# Decision Tree Classifier 

## Import Libraries

In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load Dataset

In [2]:
df=pd.read_csv(r"C:\Users\ASUS\Desktop\Drug200.csv")

In [3]:
df

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY
...,...,...,...,...,...,...
195,56,F,LOW,HIGH,11.567,drugC
196,16,M,LOW,HIGH,12.006,drugC
197,52,M,NORMAL,HIGH,9.894,drugX
198,23,M,NORMAL,NORMAL,14.020,drugX


## Pre-Processing

In [4]:
df.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [8]:
X=df.iloc[:,:-1]
Y=df.iloc[:,-1]


In [9]:
cat_data = X.select_dtypes(include=['object'])
cat_data.columns

Index(['Sex', 'BP', 'Cholesterol'], dtype='object')

In [10]:
dummies=pd.get_dummies(cat_data,drop_first=True)

In [11]:
dummies

Unnamed: 0,Sex_M,BP_LOW,BP_NORMAL,Cholesterol_NORMAL
0,0,0,0,0
1,1,1,0,0
2,1,1,0,0
3,0,0,1,0
4,0,1,0,0
...,...,...,...,...
195,0,1,0,0
196,1,1,0,0
197,1,0,1,0
198,1,0,1,1


In [12]:
X=X.drop(cat_data,axis=1)
X

Unnamed: 0,Age,Na_to_K
0,23,25.355
1,47,13.093
2,47,10.114
3,28,7.798
4,61,18.043
...,...,...
195,56,11.567
196,16,12.006
197,52,9.894
198,23,14.020


In [14]:
X=pd.concat([X,dummies],axis=1)

In [15]:
X

Unnamed: 0,Age,Na_to_K,Sex_M,BP_LOW,BP_NORMAL,Cholesterol_NORMAL
0,23,25.355,0,0,0,0
1,47,13.093,1,1,0,0
2,47,10.114,1,1,0,0
3,28,7.798,0,0,1,0
4,61,18.043,0,1,0,0
...,...,...,...,...,...,...
195,56,11.567,0,1,0,0
196,16,12.006,1,1,0,0
197,52,9.894,1,0,1,0
198,23,14.020,1,0,1,1


In [17]:
num_data = X.select_dtypes(include=['int64','float'])
num_data.columns

Index(['Age', 'Na_to_K'], dtype='object')

In [18]:
from sklearn.preprocessing import StandardScaler

In [19]:
scaler=StandardScaler()
X[num_data.columns]=scaler.fit_transform(X[num_data.columns])
X

Unnamed: 0,Age,Na_to_K,Sex_M,BP_LOW,BP_NORMAL,Cholesterol_NORMAL
0,-1.291591,1.286522,0,0,0,0
1,0.162699,-0.415145,1,1,0,0
2,0.162699,-0.828558,1,1,0,0
3,-0.988614,-1.149963,0,0,1,0
4,1.011034,0.271794,0,1,0,0
...,...,...,...,...,...,...
195,0.708057,-0.626917,0,1,0,0
196,-1.715759,-0.565995,1,1,0,0
197,0.465676,-0.859089,1,0,1,0
198,-1.291591,-0.286500,1,0,1,1


## Train-Test-Split

In [20]:
from sklearn.model_selection import train_test_split

In [32]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2)

In [None]:
## Decision Tree Classifier

In [33]:
from sklearn.tree import DecisionTreeClassifier

In [63]:
clf=DecisionTreeClassifier()
clf

DecisionTreeClassifier()

In [64]:
clf.fit(X_train,Y_train)

DecisionTreeClassifier()

## Prediction

In [70]:
print(clf.predict(X_test))
print(Y_test)

['drugX' 'drugY' 'drugX' 'drugA' 'drugY' 'drugY' 'drugA' 'drugA' 'drugY'
 'drugX' 'drugY' 'drugB' 'drugB' 'drugY' 'drugX' 'drugX' 'drugB' 'drugC'
 'drugX' 'drugC' 'drugX' 'drugX' 'drugY' 'drugY' 'drugA' 'drugB' 'drugY'
 'drugY' 'drugX' 'drugA' 'drugX' 'drugC' 'drugA' 'drugY' 'drugC' 'drugX'
 'drugX' 'drugX' 'drugA' 'drugY']
145    drugX
175    drugY
160    drugX
176    drugA
40     drugY
59     drugY
174    drugA
147    drugA
57     drugY
45     drugX
26     drugY
136    drugB
124    drugB
24     drugY
167    drugX
182    drugX
161    drugB
82     drugC
27     drugX
196    drugC
129    drugX
79     drugX
123    drugY
90     drugY
83     drugA
64     drugB
9      drugY
190    drugY
35     drugX
140    drugA
16     drugX
149    drugC
156    drugA
71     drugY
47     drugC
106    drugX
198    drugX
44     drugX
76     drugA
168    drugY
Name: Drug, dtype: object


In [71]:
from sklearn import metrics
print("Accuracy:", metrics.accuracy_score(Y_test,clf.predict(X_test)))

Accuracy: 1.0
