In [1]:
#Importing necessary libraries
import pandas as pd
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#Loading Dataset
filename = 'Drug.csv'
data = read_csv(filename)
data.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Male,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,Male,23
2,ACGEL CL NANO Gel 15gm,Acne,Male,23
3,ACGEL NANO Gel 15gm,Acne,Male,23
4,Acleen 1% Lotion 25ml,Acne,Male,23


In [3]:
data.dtypes

Drug       object
Disease    object
Gender     object
Age         int64
dtype: object

In [4]:
data.shape

(4955, 4)

In [5]:
#Check and handle missing values
data.isnull().sum()

Drug       0
Disease    0
Gender     0
Age        0
dtype: int64

### Data Preprocessing

In [6]:
data.replace({'Gender':{'Female':0,'Male':1}},inplace = True)

In [7]:
x = data[['Disease']]

In [8]:
x.Disease.unique()

array(['Acne', 'Allergy', 'Diabetes', 'Fungal infection',
       'Urinary tract infection', 'Malaria', 'Migraine', 'Hepatitis B',
       'AIDS'], dtype=object)

In [9]:
data = data.replace({'Disease':{'Acne':0, 'Allergy':1, 'Diabetes':2, 'Fungal infection':3,
       'Urinary tract infection':4, 'Malaria':5, 'Migraine':6, 'Hepatitis B':7,
       'AIDS':8}})

In [10]:
data.dtypes

Drug       object
Disease     int64
Gender      int64
Age         int64
dtype: object

### Fitting the model

In [11]:
#Feature Selection
df_x = data[['Disease','Gender','Age']]
df_y = data[['Drug']]

In [13]:
df_y.head()

Unnamed: 0,Drug
0,A CN Gel(Topical) 20gmA CN Soap 75gm
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...
2,ACGEL CL NANO Gel 15gm
3,ACGEL NANO Gel 15gm
4,Acleen 1% Lotion 25ml


In [14]:
df_x.head()

Unnamed: 0,Disease,Gender,Age
0,0,1,23
1,0,1,23
2,0,1,23
3,0,1,23
4,0,1,23


### Train and Test Split

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(df_x,df_y,test_size = 0.4, random_state = 50)

In [17]:
X_train.head()

Unnamed: 0,Disease,Gender,Age
1564,2,0,20
3443,3,0,23
231,0,0,20
4924,8,1,37
1951,2,0,20


In [18]:
y_train.head()

Unnamed: 0,Drug
1564,Emnorm SR 1000mg Tablet 10'S
3443,Eberclin Cream 30gmEberclin Cream 15gm
231,Isofeel 20 Capsule 10's
4924,Virocomb N Tablet 60'S
1951,Glimy P 15/2mg Tablet 10'S


### Fitting Random Forest

In [19]:
rf = RandomForestClassifier()
rf = rf.fit(df_x, np.ravel(df_y))

In [20]:
rf.score(X_test,y_test)

0.04742684157416751

In [21]:
prediction = rf.predict(X_test)
print(prediction[0:10])

["TEN DC M 1000mg Tablet 10'STEN DC M 500mg Tablet 10'S"
 "Secretag 2mg Tablet 10'SSecretag 1mg Tablet 10'S"
 "ITSBEST 100mg Capsule 10'sItsbest 100mg Capsule 4'SItsbest 200mg Capsule 4'SItsbest 200Mg Capsule 10'sItsbest 1% Powder 100gm"
 "Pionorm GM Tablet 10'S" 'Depiglare Cream 15gm'
 'Zykt 2% Cream 15gmZykt Soap 75gmZykt 2% Cream 30gmZykt Cream 60gm'
 'Luliclinz Cream 15gmLuliclinz Cream 30gm'
 'Cofryl Syrup 60mlCofryl Syrup 150ml' "Pionorm GM Tablet 10'S"
 "Fexodine 120mg Tablet 10'S"]


In [22]:
y_test.head()

Unnamed: 0,Drug
2037,Glizid MR 60mg Tablet 10'S
2855,Ten20 M 500 Tablet 10'S
3306,Ampholip 10 mg Injection 2mlAmpholip 100 mg In...
2020,GLIVE M Tablet 10's
159,Decroma 6% Cream 30gm


### Making Prediction for Patient with Malaria

In [23]:
test = [5,1,24]
test = np.array(test)
test = np.array(test).reshape(1,-1)
print(test.shape)

(1, 3)


### Drug Recommendation

In [24]:
prediction = rf.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S




In [25]:
import joblib as joblib
joblib.dump(rf,'medical_rf.pkl')

['medical_rf.pkl']

In [27]:
clf = joblib.load('medical_rf.pkl')

In [28]:
prediction = clf.predict(test)
print(prediction)

["Combither Forte 80/480mg Tablet 6'S"]




### Gaussian Naive Bayes

In [29]:
from sklearn.naive_bayes import GaussianNB

In [30]:
gnb = GaussianNB()
gnb = gnb.fit(df_x,np.ravel(df_y))

In [31]:
y_pred = gnb.predict(X_test)

In [32]:
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred,normalize = False))

0.04994954591321897
99


In [33]:
gnb.score(X_test,y_test)

0.04994954591321897

In [34]:
result = gnb.predict(test)
print(result[0])



Combither Forte 80/480mg Tablet 6'S


In [36]:
joblib.dump(gnb,'medical_nb.pkl')

['medical_nb.pkl']