In [1]:
# importing necessary libraries

import pandas as pd
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt

### Loading dataset

In [2]:
# loading dataset

filename = 'Drug.csv'
data = read_csv(filename)
data.head(10)

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Male,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,Male,23
2,ACGEL CL NANO Gel 15gm,Acne,Male,23
3,ACGEL NANO Gel 15gm,Acne,Male,23
4,Acleen 1% Lotion 25ml,Acne,Male,23
5,Aclene 0.10% Gel 15gm,Acne,Male,23
6,Acnay Gel 10gm,Acne,Male,23
7,Acne Aid Bar 50gmAcne Aid Bar 100gm,Acne,Male,23
8,Acne UV Gel 60gm,Acne,Male,23
9,Acne UV SPF 30 Gel 30gm,Acne,Male,23


In [3]:
data.dtypes

Drug       object
Disease    object
Gender     object
Age         int64
dtype: object

In [4]:
data.shape

(4955, 4)

In [5]:
# check and handle missing values
data.isnull().sum()

Drug       0
Disease    0
Gender     0
Age        0
dtype: int64

### Data preprocessing

In [6]:
data.replace({'Gender':{'Female':0,'Male':1}}, inplace=True)

In [7]:
x = data[['Disease']]

In [8]:
x.Disease.unique()

array(['Acne', 'Allergy', 'Diabetes', 'Fungal infection',
       'Urinary tract infection', 'Malaria', 'Migraine', 'Hepatitis B',
       'AIDS'], dtype=object)

In [9]:
data.replace({'Disease':{'Acne':0, 'Allergy':1, 'Diabetes':2, 'Fungal infection':3,
       'Urinary tract infection':4, 'Malaria':5, 'Migraine':6, 'Hepatitis B':7,
       'AIDS':8}}, inplace=True)

In [10]:
data.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,0,1,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,0,1,23
2,ACGEL CL NANO Gel 15gm,0,1,23
3,ACGEL NANO Gel 15gm,0,1,23
4,Acleen 1% Lotion 25ml,0,1,23


In [12]:
data.dtypes

Drug       object
Disease     int64
Gender      int64
Age         int64
dtype: object

### fitting the model

In [15]:
# feature selection

df_x = data[['Disease','Gender','Age']]
df_y = data[['Drug']]

In [17]:
df_x.head()

Unnamed: 0,Disease,Gender,Age
0,0,1,23
1,0,1,23
2,0,1,23
3,0,1,23
4,0,1,23


In [18]:
df_y.head()

Unnamed: 0,Drug
0,A CN Gel(Topical) 20gmA CN Soap 75gm
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...
2,ACGEL CL NANO Gel 15gm
3,ACGEL NANO Gel 15gm
4,Acleen 1% Lotion 25ml


In [20]:
# test train split

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=0)

In [21]:
X_train.head()

Unnamed: 0,Disease,Gender,Age
2150,2,1,23
167,0,0,20
3188,2,0,23
844,1,0,23
4129,3,0,37


In [22]:
y_train.head()

Unnamed: 0,Drug
2150,Glycinorm Total 30mg Tablet 10'SGlycinorm Tota...
167,Dersol BH Ointment 30gm
3188,Wosulin New 30/70 100IU Cartridge 3X3ml
844,Lejet M JR Tablet 10'S
4129,Zocon 50mg Tablet 4'SZocon Transgel 15gmZocon ...


In [23]:
# fitting random forest

from sklearn.ensemble import RandomForestClassifier

In [24]:
rf = RandomForestClassifier()
rf = rf.fit(df_x, np.ravel(df_y))

In [25]:
# Model accuracy
from sklearn.metrics import accuracy_score

y_pred = rf.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize=False))

0.04238143289606458
42


In [26]:
# score

rf.score(X_test, y_test)

0.04238143289606458

In [28]:
y_test.head()

Unnamed: 0,Drug
2540,Metofix XL 500mg Tablet 10'SMetofix XL 1000mg ...
446,ALL 3 5mg Tablet 10'S
4771,Lamivir 30mg Capsule 30'SLamivir 300mg Solutio...
541,Cetcip 10mg Tablet 10'SCetcip Syrup 30ml
1270,Acarcip 25mg Tablet 10'SAcarcip 50mg Tablet 10'S


In [27]:
# making prediction of test dataset

prediction = rf.predict(X_test)
print(prediction[:10])

["Metpio 15mg Tablet 10'S" "Alerfix 5mg Tablet 10'S"
 "Lazid N Tablet 60'SLazid N Tablet 30'S" "LFAST AM Capsule 10's"
 "Metpio 15mg Tablet 10'S" 'Kenz Anti Lice Solution 55ml'
 "Lazid N Tablet 60'SLazid N Tablet 30'S" 'Malnate R 150mg Injection 2ml'
 'Malnate R 150mg Injection 2ml' "Lazid N Tablet 60'SLazid N Tablet 30'S"]


In [31]:
# making predcition for patient with malaria, age 24, male
# malaris=5, age=24, gender=1
test = [5,1,24]
test = np.array(test) #list to numpy array
print(test.shape)
test = np.array(test).reshape(1,-1) # convert 1D array to 2D array
print(test.shape)

(3,)
(1, 3)


In [39]:
# recommend drug based on disease, age, gender

prediction = rf.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


In [34]:
# Dumping the model to the disk

import joblib as joblib
joblib.dump(rf,'model/medical_rf.pkl')

['model/medical_rf.pkl']

In [35]:
# Loading the model

clf = joblib.load('model/medical_rf.pkl')

In [37]:
import warnings
warnings.filterwarnings('ignore')

In [38]:
# make predictions using loaded model from disk

prediction = clf.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S


In [41]:
# Gaussian Naive Bayes

In [40]:
# fitting naive bayes
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb = gnb.fit(df_x, np.ravel(df_y))

In [42]:
# checking the model accuracy
y_pred = gnb.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize=False))

0.039354187689202826
39


In [43]:
# score

gnb.score(X_test, y_test)

0.039354187689202826

In [44]:
# making recommendation of drug
result = gnb.predict(test)
print(result[0])

Combither Forte 80/480mg Tablet 6'S


In [45]:
# dump the model

joblib.dump(gnb,'model/medical_nb.pkl')

['model/medical_nb.pkl']