# Imports

In [25]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier

# Reading the data

In [26]:
df = pd.read_csv('drug200.csv', delimiter=",")
df[0:10]

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY
5,22,F,NORMAL,HIGH,8.607,drugX
6,49,F,NORMAL,HIGH,16.275,drugY
7,41,M,LOW,HIGH,11.037,drugC
8,60,M,NORMAL,HIGH,15.171,drugY
9,43,M,LOW,NORMAL,19.368,drugY


# Preprocess the data

### Separate the X , Y

In [42]:
X = df[['Age','Sex', 'BP', 'Cholesterol', 'Na_to_K']].values
Y = df['Drug']

X[0:5]

array([[23, 'F', 'HIGH', 'HIGH', 25.355],
       [47, 'M', 'LOW', 'HIGH', 13.093],
       [47, 'M', 'LOW', 'HIGH', 10.114],
       [28, 'F', 'NORMAL', 'HIGH', 7.798],
       [61, 'F', 'LOW', 'HIGH', 18.043]], dtype=object)

### Numeralize the categorical columns

In [43]:
from sklearn.preprocessing import LabelEncoder

le_sex = LabelEncoder()
le_sex.fit(['F', 'M'])
X[:,1] = le_sex.transform(X[:,1])

le_bp = LabelEncoder()
le_bp.fit(['LOW', 'NORMAL', 'HIGH'])
X[:,2] = le_bp.transform(X[:,2])

le_chl = LabelEncoder()
le_chl.fit(['NORMAL', 'HIGH'])
X[:,3] = le_chl.transform(X[:,3])

X[0:5]

array([[23, 0, 0, 0, 25.355],
       [47, 1, 1, 0, 13.093],
       [47, 1, 1, 0, 10.114],
       [28, 0, 2, 0, 7.798],
       [61, 0, 1, 0, 18.043]], dtype=object)

# 70 - 30

In [44]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=4)
print(X_train.shape, X_test.shape)
print(Y_train.shape, Y_test.shape)

(140, 5) (60, 5)
(140,) (60,)


# Modelling

In [45]:
drug_tree = DecisionTreeClassifier(criterion='entropy', max_depth=4)

drug_tree.fit(X_train, Y_train)

predict_tree = drug_tree.predict(X_test)

print(predict_tree[0:5])
print(Y_test[0:5])

['drugY' 'drugY' 'drugY' 'drugY' 'drugC']
11     drugY
99     drugY
128    drugY
175    drugY
1      drugC
Name: Drug, dtype: object


# Evaluation