**DRUG CLASSIFICATION MODEL**

**Data Loading**

In [38]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import metrics

**Additional Metrics Calculation Function**

In [39]:
def calculate_additional_metrics(model_name, y_true, y_pred):
    precision = metrics.precision_score(y_true, y_pred,average='weighted')
    recall = metrics.recall_score(y_true, y_pred,average='weighted')
    f1_score = metrics.f1_score(y_true, y_pred,average='weighted')

    print(f"{model_name} - Additional Metrics:")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1_score}\n")

In [40]:
ds=pd.read_csv("drug200.csv")

In [41]:
print("Description:\n",ds.describe())

Description:
               Age     Na_to_K
count  200.000000  200.000000
mean    44.315000   16.084485
std     16.544315    7.223956
min     15.000000    6.269000
25%     31.000000   10.445500
50%     45.000000   13.936500
75%     58.000000   19.380000
max     74.000000   38.247000


**Data Exploration and Preprocessing**

In [42]:
print("null values:\n",ds.isnull().sum())

null values:
 Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64


In [43]:
print("data:\n",ds)

data:
      Age Sex      BP Cholesterol  Na_to_K   Drug
0     23   F    HIGH        HIGH   25.355  drugY
1     47   M     LOW        HIGH   13.093  drugC
2     47   M     LOW        HIGH   10.114  drugC
3     28   F  NORMAL        HIGH    7.798  drugX
4     61   F     LOW        HIGH   18.043  drugY
..   ...  ..     ...         ...      ...    ...
195   56   F     LOW        HIGH   11.567  drugC
196   16   M     LOW        HIGH   12.006  drugC
197   52   M  NORMAL        HIGH    9.894  drugX
198   23   M  NORMAL      NORMAL   14.020  drugX
199   40   F     LOW      NORMAL   11.349  drugX

[200 rows x 6 columns]


In [44]:
from sklearn import preprocessing
sex_encoding=preprocessing.LabelEncoder()
ds['Sex'] = sex_encoding.fit_transform(ds['Sex'])
BP_encoding=preprocessing.LabelEncoder()
ds['BP']=BP_encoding.fit_transform(ds['BP'])
Cholesterol_encoding=preprocessing.LabelEncoder()
ds['Cholesterol']=Cholesterol_encoding.fit_transform(ds['Cholesterol'])
#display the preprocessed dataset
print(ds)

     Age  Sex  BP  Cholesterol  Na_to_K   Drug
0     23    0   0            0   25.355  drugY
1     47    1   1            0   13.093  drugC
2     47    1   1            0   10.114  drugC
3     28    0   2            0    7.798  drugX
4     61    0   1            0   18.043  drugY
..   ...  ...  ..          ...      ...    ...
195   56    0   1            0   11.567  drugC
196   16    1   1            0   12.006  drugC
197   52    1   2            0    9.894  drugX
198   23    1   2            1   14.020  drugX
199   40    0   1            1   11.349  drugX

[200 rows x 6 columns]


**Train Test Split and Model Training**

In [45]:
xcols=[col for col in ds.columns if col not in ['Drug']]
x=ds[xcols]
y=ds['Drug']

In [46]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=3)

**Model Evaluation and Additional Metrics Calculation**

In [47]:
from sklearn.tree import DecisionTreeClassifier
dectree=DecisionTreeClassifier()
dectree.fit(x_train,y_train)
dr_prediction=dectree.predict(x_test)
print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, dr_prediction))
calculate_additional_metrics('Decision Tree', y_test, dr_prediction)

DecisionTrees's Accuracy:  0.9833333333333333
Decision Tree - Additional Metrics:
Precision: 0.9840579710144927
Recall: 0.9833333333333333
F1-Score: 0.9833152664859981



In [48]:
from sklearn.ensemble import RandomForestClassifier
ranforest = RandomForestClassifier()
ranforest.fit(x_train, y_train)
rf_prediction= ranforest.predict(x_test)
print("Random Forest's Accuracy:", metrics.accuracy_score(y_test, rf_prediction))
calculate_additional_metrics('Random Forest', y_test, rf_prediction)

Random Forest's Accuracy: 0.9833333333333333
Random Forest - Additional Metrics:
Precision: 0.9840579710144927
Recall: 0.9833333333333333
F1-Score: 0.9833152664859981



In [49]:
new_patient_data=pd.DataFrame({'Age':[30],'Sex':'F','BP':['NORMAL'],'Cholesterol':['HIGH'],'Na_to_K':[15.0]})

In [50]:
new_patient_data['Sex'] = sex_encoding.fit_transform(new_patient_data['Sex'])
new_patient_data['BP'] = BP_encoding.fit_transform(new_patient_data['BP'])
new_patient_data['Cholesterol'] = Cholesterol_encoding.fit_transform(new_patient_data['Cholesterol'])
print("New Patient Data:")
print(new_patient_data)

New Patient Data:
   Age  Sex  BP  Cholesterol  Na_to_K
0   30    0   0            0     15.0


In [51]:
dt_prediction=dectree.predict(new_patient_data)
rf_prediction = ranforest.predict(new_patient_data)
print("\nPredictions for the New Patient:")
print("Decision Tree Prediction:", dt_prediction[0])
print("Random Forest Prediction:", rf_prediction[0])


Predictions for the New Patient:
Decision Tree Prediction: drugY
Random Forest Prediction: drugY
