In [42]:
import pandas as pd

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from sklearn.model_selection import train_test_split

from sklearn.impute import SimpleImputer

from sklearn.pipeline import Pipeline

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, r2_score, confusion_matrix

In [2]:
df = pd.read_csv("./drug200.csv")
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


## Pre-Processing

In [10]:
mms = MinMaxScaler()

In [12]:
df["Age"] = mms.fit_transform(df[["Age"]])

In [23]:
df = pd.concat([df.drop("Sex", axis=1), pd.get_dummies(df["Sex"], prefix="sex")], axis=1)

In [25]:
df = pd.concat([df.drop("BP", axis=1), pd.get_dummies(df["BP"], prefix="blood_pressure")], axis=1)

In [26]:
df = pd.concat([df.drop("Cholesterol", axis=1), pd.get_dummies(df["Cholesterol"], prefix="cholesterol")], axis=1)

In [27]:
df["Na_to_K"] = mms.fit_transform(df[["Na_to_K"]])

In [29]:
df.head()

Unnamed: 0,Age,Na_to_K,Drug,sex_F,sex_M,blood_pressure_HIGH,blood_pressure_LOW,blood_pressure_NORMAL,cholesterol_HIGH,cholesterol_NORMAL
0,0.135593,0.596848,DrugY,1,0,1,0,0,1,0
1,0.542373,0.213397,drugC,0,1,0,1,0,1,0
2,0.542373,0.120239,drugC,0,1,0,1,0,1,0
3,0.220339,0.047814,drugX,1,0,0,0,1,1,0
4,0.779661,0.368191,DrugY,1,0,0,1,0,1,0


## Train & Test

In [30]:
X = df.drop("Drug", axis=1)
y = df["Drug"]

In [33]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

## Finding a model

In [35]:
rf = Pipeline([
    ("impute", SimpleImputer()),
    ("scaler", StandardScaler()),
    ("forest", RandomForestClassifier(random_state=42))
])


In [36]:
rf.fit(X_train, y_train)

In [41]:
y_pred = rf.predict(X_valid)

In [46]:
ac = accuracy_score(y_valid, y_pred)
cm = confusion_matrix(y_valid, y_pred)

In [55]:
print(f'Score: {ac*100:.2f}%\n')
print(cm)

Score: 100.00%

[[15  0  0  0  0]
 [ 0  6  0  0  0]
 [ 0  0  3  0  0]
 [ 0  0  0  5  0]
 [ 0  0  0  0 11]]


In [38]:
mlp = Pipeline([
    ("impute", SimpleImputer()),
    ("scaler", StandardScaler()),
    ("MLP", MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=300,activation = 'relu',solver='adam',random_state=42))
])

In [39]:
mlp.fit(X_train, y_train)

In [48]:
y_pred = mlp.predict(X_valid)

In [49]:
ac = accuracy_score(y_valid, y_pred)
cm = confusion_matrix(y_valid, y_pred)

In [54]:
print(f'Score: {ac*100:.2f}%\n')
print(cm)

Score: 100.00%

[[15  0  0  0  0]
 [ 0  6  0  0  0]
 [ 0  0  3  0  0]
 [ 0  0  0  5  0]
 [ 0  0  0  0 11]]
