# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Importing Packages and Libraries</p>

In [1]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import OneHotEncoder,LabelEncoder,OrdinalEncoder,StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression,Lasso,Ridge,RidgeClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.tree import DecisionTreeClassifier
import pickle as pk

# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Data Loading</p>

In [2]:
data=pd.read_csv('/kaggle/input/drug-classification/drug200.csv')

# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Data Cleaning</p>

In [3]:
data.duplicated().sum()

0

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          200 non-null    int64  
 1   Sex          200 non-null    object 
 2   BP           200 non-null    object 
 3   Cholesterol  200 non-null    object 
 4   Na_to_K      200 non-null    float64
 5   Drug         200 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 9.5+ KB


# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Machine Learning</p>

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Constructing Encoders For Data Model</p>
</div>


In [5]:
one=OneHotEncoder(handle_unknown='ignore',sparse=False)

In [6]:
le=LabelEncoder()

In [7]:
target=le.fit_transform(data['Drug'])

In [8]:
std=('standard',StandardScaler(),['Age','Na_to_K'])

In [9]:
data.drop(['Drug'],axis=1,inplace=True)

In [10]:
encode=('one',one,data.columns)

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Splitting Data</p>
</div>


In [11]:
xtra,xtest,ytra,ytest=train_test_split(data,target,test_size=0.2,random_state=456)

In [12]:
col=ColumnTransformer(transformers=[std,encode])

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Maachine Learning Models</p>
</div>


In [13]:
model1=RandomForestClassifier()
model2=SVC()
model3=LogisticRegression()
model4=KNeighborsClassifier()
model5=DecisionTreeClassifier()
la=Lasso()
ri=RidgeClassifier()

In [14]:
model6=GridSearchCV(ri,param_grid = {'alpha': np.logspace(-4, 4, 9)},cv=5,verbose=2,scoring='accuracy')

In [15]:
model7=GridSearchCV(ri,param_grid = {'alpha': np.logspace(-4, 4, 9)},cv=5,verbose=2,scoring='accuracy')

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Constructing Pipelines</p>
</div>


In [16]:
pipe1=Pipeline(steps=[('col',col),('model1',model1)])
pipe2=Pipeline(steps=[('col',col),('model2',model2)])
pipe3=Pipeline(steps=[('col',col),('model3',model3)])
pipe4=Pipeline(steps=[('col',col),('model4',model4)])

pipe5=Pipeline(steps=[('col',col),('model5',model5)])
pipe6=Pipeline(steps=[('col',col),('model6',model6)])
pipe7=Pipeline(steps=[('col',col),('model7',model7)])

# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Results</p>

In [17]:
c1,c2,c3,c4,c5,c6,c7=cross_val_score(pipe1,data,target),cross_val_score(pipe2,xtest,ytest),cross_val_score(pipe3,xtest,ytest),cross_val_score(pipe4,xtest,ytest),cross_val_score(pipe5,xtest,ytest),cross_val_score(pipe6,xtest,ytest),cross_val_score(pipe7,xtest,ytest)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END .......................................alpha=0.0001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END ........................................alpha=0.001; total time=   0.0s
[CV] END .........................................alpha=0.01; total time=   0.0s
[CV] END ........................................

In [18]:
print('Cross_Val_Scores:\nRandomForestClassifier:{:.2f}\nSVC:{:.2f}\nLogisticRegression:{:.2f}\nKNeighborsClassifier:{:.2f}\nDecisionTreeClassifier:{:.2f}\nRigdeModel:{:.2f}\nLassoModel:{:.2f}'.format(c1.mean(),c2.mean(),c3.mean(),c4.mean(),c5.mean(),c6.mean(),c7.mean()))

Cross_Val_Scores:
RandomForestClassifier:0.97
SVC:0.65
LogisticRegression:0.78
KNeighborsClassifier:0.72
DecisionTreeClassifier:0.93
RigdeModel:0.68
LassoModel:0.68


In [19]:
models=['RandomForestClassifier','LogisticRegression','DecisionTreeClassifier','RidgeModel','LassoModel','SVC','KNeighborsClassifier']
modelcrossval=[c1,c3,c5,c6,c7,c2,c4]

# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">HyperParameter Tuning</p>

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Decision Tree Model Tuning</p>
</div>


In [20]:
newmodel=GridSearchCV(pipe5,param_grid = {
    'model5__max_depth': [None, 10, 20, 30],
    'model5__min_samples_split': [2, 5, 10],
    'model5__min_samples_leaf': [1, 2, 4],
    'model5__max_features': ['auto', 'sqrt', 'log2'],
},cv=5,verbose=2,scoring='f1')

In [21]:
newmodel.fit(xtra,ytra)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=2; total time=   0.0s
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=2; total time=   0.0s
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=2; total time=   0.0s
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=2; total time=   0.0s
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=2; total time=   0.0s
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=5; total time=   0.0s
[CV] END model5__max_depth=None, model5__max_features=auto, model5__min_samples_leaf=1, model5__min_samples_split=5; tota

In [22]:
newmodel

In [23]:
model8=newmodel.best_estimator_

In [24]:
model8.score(xtest,ytest)

0.675

In [25]:
dttuned=cross_val_score(model8,data,target)

In [26]:
ypre=model8.predict(xtest)

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Logistic Regression Model Tuning</p>
</div>


In [27]:
param_grid = {
    'model3__penalty': ['l2'],  # L1 or L2 regularization
    'model3__C': [0.001, 0.01, 0.1, 1, 10, 100],  # Regularization strength
    'model3__fit_intercept': [True, False],  # Whether to fit an intercept
    'model3__class_weight': [None, 'balanced'],  # Weights associated with classes
    'model3__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],  # Optimization algorithm
    'model3__max_iter': [100, 200, 300],  # Maximum number of iterations
}

In [28]:
newmodel2=GridSearchCV(pipe3,param_grid,cv=5,scoring='f1',verbose=2)

In [29]:
newmodel2.fit(xtra,ytra)

Fitting 5 folds for each of 360 candidates, totalling 1800 fits
[CV] END model3__C=0.001, model3__class_weight=None, model3__fit_intercept=True, model3__max_iter=100, model3__penalty=l2, model3__solver=newton-cg; total time=   0.0s
[CV] END model3__C=0.001, model3__class_weight=None, model3__fit_intercept=True, model3__max_iter=100, model3__penalty=l2, model3__solver=newton-cg; total time=   0.0s
[CV] END model3__C=0.001, model3__class_weight=None, model3__fit_intercept=True, model3__max_iter=100, model3__penalty=l2, model3__solver=newton-cg; total time=   0.0s
[CV] END model3__C=0.001, model3__class_weight=None, model3__fit_intercept=True, model3__max_iter=100, model3__penalty=l2, model3__solver=newton-cg; total time=   0.0s
[CV] END model3__C=0.001, model3__class_weight=None, model3__fit_intercept=True, model3__max_iter=100, model3__penalty=l2, model3__solver=newton-cg; total time=   0.0s
[CV] END model3__C=0.001, model3__class_weight=None, model3__fit_intercept=True, model3__max_ite

In [30]:
newmodel2

In [31]:
newmodel2_=newmodel2.best_estimator_

In [32]:
lrtuned=cross_val_score(newmodel2_,data,target)

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Random Forest Model Tuning</p>
</div>


In [33]:
param_grid = {
    'model1__n_estimators': [50, 100, 200],  # Number of trees in the forest
    'model1__max_depth': [None, 10, 20, 30],  # Maximum depth of the tree
    'model1__min_samples_split': [2, 5, 10],  # The minimum number of samples required to split an internal node
    'model1__min_samples_leaf': [1, 2, 4],  # The minimum number of samples required to be at a leaf node
    'model1__max_features': ['auto', 'sqrt', 'log2'],  # The number of features to consider when looking for the best split
}

In [34]:
newmodel3=GridSearchCV(pipe1,param_grid,cv=5,scoring='f1',verbose=2)

In [35]:
newmodel3.fit(xtra,ytra)

Fitting 5 folds for each of 324 candidates, totalling 1620 fits
[CV] END model1__max_depth=None, model1__max_features=auto, model1__min_samples_leaf=1, model1__min_samples_split=2, model1__n_estimators=50; total time=   0.1s
[CV] END model1__max_depth=None, model1__max_features=auto, model1__min_samples_leaf=1, model1__min_samples_split=2, model1__n_estimators=50; total time=   0.1s
[CV] END model1__max_depth=None, model1__max_features=auto, model1__min_samples_leaf=1, model1__min_samples_split=2, model1__n_estimators=50; total time=   0.1s
[CV] END model1__max_depth=None, model1__max_features=auto, model1__min_samples_leaf=1, model1__min_samples_split=2, model1__n_estimators=50; total time=   0.1s
[CV] END model1__max_depth=None, model1__max_features=auto, model1__min_samples_leaf=1, model1__min_samples_split=2, model1__n_estimators=50; total time=   0.1s
[CV] END model1__max_depth=None, model1__max_features=auto, model1__min_samples_leaf=1, model1__min_samples_split=2, model1__n_esti

In [36]:
newmodel3_=newmodel3.best_estimator_

In [37]:
newmodel3_.score(xtest,ytest)

0.925

In [38]:
rftuned=cross_val_score(newmodel3_,data,target)

In [39]:
tunedmodels=['RandomForestClassifier','LogisticRegression','DecisionTreeClassifier']
tunedmodelscrossval=[rftuned,lrtuned,dttuned]

# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Final Results</p>

In [40]:
mean=[]
for i in modelcrossval:
    mean.append(i.mean())
tunedmean=[]
for i in tunedmodelscrossval:
    tunedmean.append(i.mean())

In [41]:
pd.DataFrame(dict(Models=models,Cross_Val_scores=modelcrossval,means=mean,Tuned_Models=tunedmodels+['-']*4,Tuned_Models_Cross_Val_Scores=tunedmodelscrossval+['-']*4,Tuned_Model_Means=tunedmean+['-']*4))

Unnamed: 0,Models,Cross_Val_scores,means,Tuned_Models,Tuned_Models_Cross_Val_Scores,Tuned_Model_Means
0,RandomForestClassifier,"[0.975, 1.0, 1.0, 0.9, 1.0]",0.975,RandomForestClassifier,"[1.0, 1.0, 1.0, 0.9, 1.0]",0.98
1,LogisticRegression,"[0.625, 0.875, 0.75, 0.875, 0.75]",0.775,LogisticRegression,"[0.475, 0.45, 0.45, 0.45, 0.45]",0.455
2,DecisionTreeClassifier,"[0.875, 0.875, 0.875, 1.0, 1.0]",0.925,DecisionTreeClassifier,"[0.625, 0.925, 0.85, 0.675, 0.9]",0.795
3,RidgeModel,"[0.5, 0.75, 0.625, 0.75, 0.75]",0.675,-,-,-
4,LassoModel,"[0.5, 0.75, 0.625, 0.75, 0.75]",0.675,-,-,-
5,SVC,"[0.625, 0.625, 0.5, 0.75, 0.75]",0.65,-,-,-
6,KNeighborsClassifier,"[0.375, 0.75, 0.625, 0.875, 1.0]",0.725,-,-,-


<div style="border-radius: 10px; border: rgb(41, 128, 185) solid; padding: 15px; background-color: rgb(52, 73, 94); font-size: 100%; text-align: left; color: #ecf0f1; box-shadow: 0 0 15px rgba(41, 128, 185, 0.7);">
    <ul style="margin: 0; padding-left: 20px; list-style-type: square;">
        <li><strong>Decision Tree Classifier is a Robust Model as well as a Good Model</strong></li>
    </ul>
</div>


<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Model Saving</p>
</div>


In [42]:

pk.dump(model5,open('/kaggle/working/model.pkl','wb'))

<div style="background-color: #000; padding: 10px; text-align: center; border-radius: 5px; box-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">
    <p style="font-family: 'JetBrains Mono', monospace; font-weight: bold; letter-spacing: 2px; color: #3498db; font-size: 140%; text-shadow: 0 0 10px rgba(255, 255, 255, 0.7);">Model Loading</p>
</div>


In [43]:
model=pk.load(open('/kaggle/working/model.pkl','rb'))
data=pd.read_csv('/kaggle/input/drug-classification/drug200.csv')

# <p style="font-family:JetBrains Mono; font-weight:bold; letter-spacing: 2px; color:#4F200D; font-size:140%; text-align:center;padding: 0px; border-bottom: 3px solid #4F200D">Model Utilization</p>

In [44]:
"""rows=[]
n=int(input('Enter the no of records : '))
print("Sample Inputs Age(int-range(15-74)),Sex(F or M),BP(HIGH or LOW or NORMAL),Cholesterol(HIGH OR NORMAL),Na_to_k(float-range(6-39))")
for i in range(n):
  print('FOR {} Record :'.format(i+1))
  row=[]
  for j in range(len(data.columns)-1):
    if j==0:
      k=int(input('Enter the value (%s):'%(data.columns[j])))
    else:
      k=input('Enter the value (%s):'%(data.columns[j]))
    row.append(k)
  rows.append(row)
newtable=pd.DataFrame(rows,columns=data.columns[:-1])
ans=model.predict(newtable)
def answer(ansarr):
  ans1=[]
  for i in ans:
    if i==0:
      ans1.append('DrugY')
    elif i==1:
      ans1.append('DrugA')
    elif i==2:
      ans1.append('DrugB')
    elif i==3:
      ans1.append('DrugC')
    elif i==4:
      ans1.append('DrugX')
  return ans1

for i in range(len(ans)):
  print(f"'Record'{(i+1): }: "+answer(ans)[i])"""

'rows=[]\nn=int(input(\'Enter the no of records : \'))\nprint("Sample Inputs Age(int-range(15-74)),Sex(F or M),BP(HIGH or LOW or NORMAL),Cholesterol(HIGH OR NORMAL),Na_to_k(float-range(6-39))")\nfor i in range(n):\n  print(\'FOR {} Record :\'.format(i+1))\n  row=[]\n  for j in range(len(data.columns)-1):\n    if j==0:\n      k=int(input(\'Enter the value (%s):\'%(data.columns[j])))\n    else:\n      k=input(\'Enter the value (%s):\'%(data.columns[j]))\n    row.append(k)\n  rows.append(row)\nnewtable=pd.DataFrame(rows,columns=data.columns[:-1])\nans=model.predict(newtable)\ndef answer(ansarr):\n  ans1=[]\n  for i in ans:\n    if i==0:\n      ans1.append(\'DrugY\')\n    elif i==1:\n      ans1.append(\'DrugA\')\n    elif i==2:\n      ans1.append(\'DrugB\')\n    elif i==3:\n      ans1.append(\'DrugC\')\n    elif i==4:\n      ans1.append(\'DrugX\')\n  return ans1\n\nfor i in range(len(ans)):\n  print(f"\'Record\'{(i+1): }: "+answer(ans)[i])'

<div style="border-radius: 10px; border: 2px solid #3498db; padding: 15px; background-color: #000; font-size: 120%; text-align: center; color: #3498db; box-shadow: 0 0 15px rgba(52, 152, 219, 0.7);">
    <p style="margin: 0; font-weight: bold; font-size: 150%; color: #3498db;">Thank You!</p>
    <p style="margin: 10px 0 20px; color: #3498db;">Wishing you a day filled with tranquility and positivity!</p>
    <p style="margin: 0; color: #3498db;">Explore the calm and soothing content we have in store for you.</p>
</div>
