### Importing Necessary Libraries 

In [1]:
import pandas as pd
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt

### Loading Dataset

In [3]:
filename = 'Drug.csv'
data = read_csv(filename)
data.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Male,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,Male,23
2,ACGEL CL NANO Gel 15gm,Acne,Male,23
3,ACGEL NANO Gel 15gm,Acne,Male,23
4,Acleen 1% Lotion 25ml,Acne,Male,23


In [4]:
data.dtypes

Drug       object
Disease    object
Gender     object
Age         int64
dtype: object

In [5]:
data.shape

(4955, 4)

In [6]:
# Check and handle missing values
data.isnull().sum()

Drug       0
Disease    0
Gender     0
Age        0
dtype: int64

### Data Preprocessing

In [7]:
data.replace({'Gender':{'Female':0,'Male':1}},inplace=True)

In [8]:
x = data[['Disease']]

In [9]:
x.Disease.unique()

array(['Acne', 'Allergy', 'Diabetes', 'Fungal infection',
       'Urinary tract infection', 'Malaria', 'Migraine', 'Hepatitis B',
       'AIDS'], dtype=object)

In [10]:
data.replace({'Disease':{'Acne':0, 'Allergy':1, 'Diabetes':2, 'Fungal infection':3,
       'Urinary tract infection':4, 'Malaria':5, 'Migraine':6, 'Hepatitis B':7,
       'AIDS':8}},inplace=True)

In [11]:
data.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,0,1,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,0,1,23
2,ACGEL CL NANO Gel 15gm,0,1,23
3,ACGEL NANO Gel 15gm,0,1,23
4,Acleen 1% Lotion 25ml,0,1,23


### Fitting The Model

In [13]:
# Feature Selection
df_x = data[['Disease','Gender','Age']]
df_y = data[['Drug']]

In [14]:
df_x.head()

Unnamed: 0,Disease,Gender,Age
0,0,1,23
1,0,1,23
2,0,1,23
3,0,1,23
4,0,1,23


In [15]:
df_y.head()

Unnamed: 0,Drug
0,A CN Gel(Topical) 20gmA CN Soap 75gm
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...
2,ACGEL CL NANO Gel 15gm
3,ACGEL NANO Gel 15gm
4,Acleen 1% Lotion 25ml


In [16]:
# Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=0)

In [17]:
X_train.shape

(3964, 3)

In [18]:
X_train.head()

Unnamed: 0,Disease,Gender,Age
2150,2,1,23
167,0,0,20
3188,2,0,23
844,1,0,23
4129,3,0,37


In [19]:
y_train.head()

Unnamed: 0,Drug
2150,Glycinorm Total 30mg Tablet 10'SGlycinorm Tota...
167,Dersol BH Ointment 30gm
3188,Wosulin New 30/70 100IU Cartridge 3X3ml
844,Lejet M JR Tablet 10'S
4129,Zocon 50mg Tablet 4'SZocon Transgel 15gmZocon ...


#### Fitting GaussianNB

In [33]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb = gnb.fit(df_x, np.ravel (df_y))

In [34]:
# Checking Model Accuracy
from sklearn.metrics import accuracy_score 
y_pred = gnb.predict(X_test)
print (accuracy_score (y_test, y_pred)) 
print(accuracy_score (y_test,y_pred, normalize=False))

0.039354187689202826
39


In [35]:
# Score
gnb.score(X_test,y_test)

0.039354187689202826

In [36]:
# Recommend Drug based on Disease, Age and Gender
prediction = gnb.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


In [37]:
# Dump Model in Disk
import joblib as joblib
joblib.dump(gnb, 'model/medical_gnb.pkl')

['model/medical_gnb.pkl']

In [38]:
# Loading the Model
med_gnb = joblib.load('model/medical_gnb.pkl')

In [39]:
# Predict Using Loaded Model in the Disk
prediction = med_gnb.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


#### Fitting Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf = rf.fit(df_x,np.ravel(df_y))



In [21]:
# Model Accuracy
from sklearn.metrics import accuracy_score
y_pred = rf.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred, normalize=False))

0.04641775983854692
46


In [22]:
# RF Score
rf.score(X_test,y_test)

0.04641775983854692

In [24]:
y_test.head()

Unnamed: 0,Drug
2540,Metofix XL 500mg Tablet 10'SMetofix XL 1000mg ...
446,ALL 3 5mg Tablet 10'S
4771,Lamivir 30mg Capsule 30'SLamivir 300mg Solutio...
541,Cetcip 10mg Tablet 10'SCetcip Syrup 30ml
1270,Acarcip 25mg Tablet 10'SAcarcip 50mg Tablet 10'S


In [23]:
# Making Prediction of Test Dataset
prediction = rf.predict(X_test)
print(prediction[0:10])

["Glucozid M 500mg Tablet 10'S" "Alerdain M Tablet 10'S"
 "Axovir 800mg Tablet 10'SAxovir 400mg Tablet 10'SAxovir 250mg Injection 1'SAxovir 500mg Injection 1'SAxovir 200mg Tablet 10'S"
 "Byellergy 10mg Tablet 10'S" "Glucozid M 500mg Tablet 10'S"
 "Itra Plus 200mg Tablet 10'S"
 "Axovir 800mg Tablet 10'SAxovir 400mg Tablet 10'SAxovir 250mg Injection 1'SAxovir 500mg Injection 1'SAxovir 200mg Tablet 10'S"
 'Lumate Injection 2ml' 'Lumate Injection 2ml'
 "Axovir 800mg Tablet 10'SAxovir 400mg Tablet 10'SAxovir 250mg Injection 1'SAxovir 500mg Injection 1'SAxovir 200mg Tablet 10'S"]


In [25]:
# Making Prediction For Patient with Malaria, Age 24 and Gender Male
test = [5,1,24]
test = np.array(test) # List to Numpy Array
print(test.shape) #Shape of array is 1D
test = np.array(test).reshape(1,-1) #Convert 1D to 2D Array
print(test.shape)

(3,)
(1, 3)


In [26]:
# Recommend Drug based on Disease, Age and Gender
prediction = rf.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


In [28]:
# Dump Model in Disk
import joblib as joblib
joblib.dump(rf, 'model/medical_rf.pkl')

['model/medical_rf.pkl']

In [31]:
# Loading the Model
med_rf = joblib.load('model/medical_rf.pkl')

In [32]:
# Predict Using Loaded Model in the Disk
prediction = med_rf.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


#### Fitting Decision Tree

In [40]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt = dt.fit(df_x,np.ravel(df_y))

In [41]:
#Check the Accuracy
y_pred = dt.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred, normalize=False))

0.039354187689202826
39


In [42]:
#DT Score
dt.score(X_test,y_test)

0.039354187689202826

In [43]:
# Recommend Drug based on Disease, Age and Gender
prediction = dt.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


In [44]:
# Dump Model in Disk
import joblib as joblib
joblib.dump(dt, 'model/medical_dt.pkl')

['model/medical_dt.pkl']

In [45]:
# Loading the Model
med_dt = joblib.load('model/medical_dt.pkl')

In [46]:
# Predict Using Loaded Model in the Disk
prediction = med_dt.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


#### Logistic Regression

In [48]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr = lr.fit(df_x,np.ravel(df_y))



In [49]:
#Check the Accuracy
y_pred = lr.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred, normalize=False))

0.0010090817356205853
1


In [50]:
#LR Score
lr.score(X_test,y_test)

0.0010090817356205853

In [51]:
# Recommend Drug based on Disease, Age and Gender
prediction = lr.predict(test)
print(prediction[0])

Velpaclear 400/100mg Tablet 28'S
