In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import warnings 
warnings.filterwarnings("ignore")

In [2]:
drugs = pd.read_csv("Drugs Data.csv")
drugs.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY


In [3]:
feature_cols = ['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']
x = drugs[feature_cols].values
y = drugs.Drug

In [4]:
from sklearn import preprocessing

cod_Sex = preprocessing.LabelEncoder()
cod_Sex.fit(['F', 'M'])
x[:, 1] = cod_Sex.transform(x[:, 1])

cod_Bp = preprocessing.LabelEncoder()
cod_Bp.fit(['HIGH', 'NORMAL', 'LOW'])
x[:, 2] = cod_Bp.transform(x[:, 2])

cod_Cholesterol = preprocessing.LabelEncoder()
cod_Cholesterol.fit(['NORMAL', 'HIGH'])
x[:, 3] = cod_Cholesterol.transform(x[:, 3])

In [5]:
# Creacion de grupos de entrenamiento y prueba
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 1)

# 01 Bosques aleatorios (Random Forest) 

In [7]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators = 100, random_state = 1)
rf.fit(x_train, y_train)

In [8]:
# Estadisticas de desempeño generales 
from sklearn.metrics import classification_report
y_pred = rf.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         4
       drugB       1.00      1.00      1.00         2
       drugC       1.00      0.75      0.86         4
       drugX       0.93      1.00      0.96        13
       drugY       1.00      1.00      1.00        17

    accuracy                           0.97        40
   macro avg       0.99      0.95      0.96        40
weighted avg       0.98      0.97      0.97        40



# 02 Gradient Boosted trees

In [10]:
from sklearn.ensemble import GradientBoostingClassifier
gbt = GradientBoostingClassifier(n_estimators = 100, random_state = 1)
gbt.fit(x_train, y_train)  

In [11]:
# Estadisticas de desempeño general
y_pred = gbt.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         4
       drugB       1.00      1.00      1.00         2
       drugC       1.00      1.00      1.00         4
       drugX       1.00      1.00      1.00        13
       drugY       1.00      1.00      1.00        17

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40



# 03 Gradient Boosted trees with depht = 2

In [13]:
gbt = GradientBoostingClassifier(n_estimators = 100, random_state = 1, max_depth = 2) 
gbt.fit(x_train, y_train)

In [14]:
# Estadisticas de desempeño general
y_pred = gbt.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         4
       drugB       1.00      1.00      1.00         2
       drugC       1.00      1.00      1.00         4
       drugX       1.00      1.00      1.00        13
       drugY       1.00      1.00      1.00        17

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40



# 04 AdaBoost Classifier with desicion tree

In [16]:
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators = 50, learning_rate = 1)
abc.fit(x_train, y_train)

In [17]:
y_pred = abc.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       drugA       0.67      1.00      0.80         4
       drugB       0.00      0.00      0.00         2
       drugC       0.00      0.00      0.00         4
       drugX       0.76      1.00      0.87        13
       drugY       1.00      1.00      1.00        17

    accuracy                           0.85        40
   macro avg       0.49      0.60      0.53        40
weighted avg       0.74      0.85      0.79        40



# 05 AdaBoost Classifier with SVC

In [19]:
from sklearn.svm import SVC
svc = SVC(probability = True, kernel = 'linear')

In [20]:
abc = AdaBoostClassifier(n_estimators = 50, estimator = svc, learning_rate = 1)
abc.fit(x_train, y_train)

In [21]:
y_pred = abc.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       drugA       0.00      0.00      0.00         4
       drugB       0.00      0.00      0.00         2
       drugC       0.00      0.00      0.00         4
       drugX       0.00      0.00      0.00        13
       drugY       0.42      1.00      0.60        17

    accuracy                           0.42        40
   macro avg       0.08      0.20      0.12        40
weighted avg       0.18      0.42      0.25        40

