In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
data = pd.read_csv("drug200.csv")


In [3]:
print("Dataset Shape:", data.shape)
print("Dataset Columns:", data.columns)
print("Dataset Head:\n", data.head())

Dataset Shape: (200, 6)
Dataset Columns: Index(['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K', 'Drug'], dtype='object')
Dataset Head:
    Age Sex      BP Cholesterol  Na_to_K   Drug
0   23   F    HIGH        HIGH   25.355  drugY
1   47   M     LOW        HIGH   13.093  drugC
2   47   M     LOW        HIGH   10.114  drugC
3   28   F  NORMAL        HIGH    7.798  drugX
4   61   F     LOW        HIGH   18.043  drugY


In [10]:
data = pd.get_dummies(data, columns=['Sex', 'BP', 'Cholesterol'])

In [11]:
X = data.drop(columns=['Drug'])
y = data['Drug']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [13]:
clf = DecisionTreeClassifier(random_state=42)


In [14]:
clf.fit(X_train, y_train)

In [15]:
y_pred = clf.predict(X_test)

In [16]:
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy Score: 1.0
Confusion Matrix:
 [[ 7  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  6  0  0]
 [ 0  0  0 18  0]
 [ 0  0  0  0 26]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         6
           3       1.00      1.00      1.00        18
           4       1.00      1.00      1.00        26

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60



In [17]:
predicted_values = pd.Series(y_pred, name='Predicted_Drug')

In [18]:
test_data_with_predictions = X_test.copy()
test_data_with_predictions['Predicted_Drug'] = predicted_values

In [19]:
print("Test Data with Predicted Values:\n", test_data_with_predictions)

Test Data with Predicted Values:
      Age  Na_to_K  Sex_F  Sex_M  BP_HIGH  BP_LOW  BP_NORMAL  Cholesterol_HIGH  \
95    36   11.424      0      1        0       1          0                 0   
15    16   15.516      1      0        1       0          0                 0   
30    18    8.750      1      0        0       0          1                 0   
158   59   10.444      1      0        0       1          0                 1   
128   47   33.542      0      1        0       1          0                 0   
115   51   18.295      0      1        1       0          0                 1   
69    18   24.276      1      0        1       0          0                 0   
170   28   12.879      1      0        0       0          1                 1   
174   42   12.766      0      1        1       0          0                 0   
45    66    8.107      1      0        0       0          1                 0   
66    29   12.856      0      1        1       0          0                