# Predict the class of a unknown patient

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Read Data!

In [None]:
df_patient = pd.read_csv('drug.csv')

In [None]:
df_patient.head()

In [None]:
print('Number Of patient: ', len(df_patient))

In [None]:
print('Shape Of Data: ', df_patient.shape)

In [None]:
df_patient.info()

## Pre-processing

### column-wise distribution of null values

In [None]:
df_patient.isnull().sum()

### total number of missing values

In [None]:
df_patient.isnull().values.sum()

### total number of duplicated

In [None]:
df_patient.duplicated().sum()

### Categorical Data

In [None]:
df_patient.dtypes

In [None]:
df_patient['Sex'].value_counts()

In [None]:
df_patient['Sex'].value_counts().count()

In [None]:
df_patient_edited = df_patient.copy()

df_patient_edited.head()

In [None]:
df_patient_edited['Sex'].replace(['F','M'],[0,1], inplace=True)

In [None]:
df_patient_edited.head()

In [None]:
df_patient_edited.dtypes

In [None]:
from sklearn import preprocessing

In [None]:
df_patient_edited['BP'].value_counts()

In [None]:
LE_BP = preprocessing.LabelEncoder()

LE_BP.fit(['LOW', 'NORMAL', 'HIGH'])

df_patient_edited['BP'] = LE_BP.transform(df_patient_edited['BP'])

In [None]:
df_patient_edited['Cholesterol'].value_counts()

In [None]:
LE_Cholesterol = preprocessing.LabelEncoder()

LE_Cholesterol.fit(['NORMAL', 'HIGH'])

df_patient_edited['Cholesterol'] = LE_Cholesterol.transform(df_patient_edited['Cholesterol'])

In [None]:
df_patient_edited.head()

In [None]:
df_patient_edited.dtypes

## Create Model

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df_patient_edited.iloc[:,:5].values

Y = df_patient_edited['Drug']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=30, stratify=Y)

In [None]:
print("x_train.shape:", x_train.shape)

print("y_train.shape", y_train.shape)

print("x_test.shape", x_test.shape)

print("y_test.shape", y_test.shape)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
DT_model = DecisionTreeClassifier(criterion="entropy", max_depth=3)

In [None]:
DT_model.fit(x_train,y_train)

In [None]:
y_pred = DT_model.predict(x_test)

## Evaluation

In [None]:
from sklearn import metrics

In [None]:
print("Accuracy: ", metrics.accuracy_score(y_test, y_pred))

print("f1_score: ", metrics.f1_score(y_test, y_pred, average='macro'),'\n')

print(metrics.classification_report(y_test, y_pred))

In [None]:
train_accuracy = []

validation_accuracy = []

for depth in range(1,8):
    
    DT_model = DecisionTreeClassifier(max_depth=depth, random_state=10)
    
    DT_model.fit(x_train, y_train)
    
    train_accuracy.append(DT_model.score(x_train, y_train))
    
    validation_accuracy.append(DT_model.score(x_test, y_test))

In [None]:
df_depth = pd.DataFrame({'max_depth':range(1,8), 'train_acc':train_accuracy, 'valid_acc':validation_accuracy})

In [None]:
df_depth.head()

In [None]:
plt.figure(figsize=(12,6))

plt.plot(df_depth['max_depth'], df_depth['train_acc'], marker='o')

plt.plot(df_depth['max_depth'], df_depth['valid_acc'], marker='o')

plt.xlabel('Depth of tree')

plt.ylabel('Performance')

plt.title('Accuracy')

plt.legend(['train_acc', 'valid_acc'], loc='upper left')

In [None]:
DT_model2 = DecisionTreeClassifier(max_depth=4)

In [None]:
DT_model2.fit(x_train, y_train)

In [None]:
y_pred2 = DT_model2.predict(x_test)

In [None]:
print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, y_pred2))
print("DecisionTrees's f1_score: ", metrics.f1_score(y_test, y_pred2, average='macro'))

In [None]:
from sklearn.tree import plot_tree

In [None]:
plot_tree(DT_model2)

In [None]:
!pip install pydotplus
!pip install graphviz

In [None]:
from six import StringIO
import pydotplus
import matplotlib.image as mpimg
from sklearn import tree
from graphviz import Digraph

dot_data = StringIO()
filename = "drugtree.png"
featureNames = df_patient_edited.columns[0:5]
targetNames = df_patient_edited["Drug"].unique().tolist()
out = tree.export_graphviz(DT_model2, feature_names=featureNames, out_file=dot_data,
                           class_names=np.unique(y_train), filled=True,
                           special_characters=True, rotate=False)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
graph.write_png(filename)
img = mpimg.imread(filename)
plt.figure(figsize=(15,15))
plt.imshow(img, interpolation='nearest')


https://github.com/njiix/py4ds