In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score,classification_report
from sklearn.ensemble import RandomForestClassifier
import pickle
 

## [Disclaimer!](Disclaimer.md) the datasets used in this file in entirely fictional and are not based on any real data. 
### You the see dataset creation process in the [making_df](making_df.ipynb) file.

In [35]:
#We know that our data contains 'N/A' values for Dosage and frequency columns for Bronchiolitis patients, 
#For our model we will use these values as it is

df=pd.read_csv(r'Respiratory_Sound_Database\medicine_prescription.csv',keep_default_na=False)
df.head()

Unnamed: 0,Patient ID,Age,Gender,Smoking Status,Disease,Medication Prescribed,Dosage,Frequency
0,1861,89,Female,Non-smoker,URTI,Ibuprofen,400 mg,Every 6 to 8 hours as needed
1,354,96,Female,Non-smoker,LRTI,Doxycycline,100 mg,Twice daily for 7-14 days
2,1334,15,Female,Active-smoker,URTI,Ibuprofen,400 mg,Every 6 to 8 hours as needed
3,906,15,Male,Ex-smoker,URTI,Ibuprofen,400 mg,Every 6 to 8 hours as needed
4,1290,50,Male,Ex-smoker,COPD,Tiotropium,1 puff (18 mcg),Once daily


In [37]:
#create a one hot encoder object
encoder=OneHotEncoder()

#separate categirical and numerical features
categorical_data=df[['Gender','Smoking Status','Disease']]
numeric_data=df[['Age']]

#encode the categorical data and convert to array
categorical_data=encoder.fit_transform(categorical_data).toarray()

#combine the numerical and categorical data
features = np.hstack([numeric_data, categorical_data])


labels=df[['Medication Prescribed','Dosage','Frequency']].values

In [63]:
features

array([[89.,  1.,  0., ...,  0.,  0.,  1.],
       [96.,  1.,  0., ...,  1.,  0.,  0.],
       [15.,  1.,  0., ...,  0.,  0.,  1.],
       ...,
       [50.,  1.,  0., ...,  0.,  0.,  1.],
       [37.,  1.,  0., ...,  0.,  0.,  1.],
       [34.,  0.,  1., ...,  0.,  0.,  1.]])

In [38]:
#split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=28)

In [39]:
y_test

array([['Doxycycline', '100 mg', 'Twice daily for 7-14 days'],
       ['Doxycycline', '100 mg', 'Twice daily for 7-14 days'],
       ['Fluticasone', '88 mcg', 'Twice daily'],
       ...,
       ['Amoxicillin', '90 mg/kg/day', 'Divided in 2 doses'],
       ['Doxycycline', '100 mg', 'Twice daily for 7-14 days'],
       ['Amoxicillin', '500 mg', 'Every 8 hours']], dtype=object)

In [40]:
#initialize the model
model=RandomForestClassifier(random_state=28)

In [41]:
model.fit(X_train,y_train)

In [42]:
#evaluate the model
y_pred=model.predict(X_test)

accuracies = [accuracy_score(y_test[:, i], y_pred[:, i]) for i in range(y_test.shape[1])]

for i, accuracy in enumerate(accuracies):
    print(f"Accuracy for output {i}: {accuracy}")

Accuracy for output 0: 0.995
Accuracy for output 1: 0.9775
Accuracy for output 2: 0.995


In [44]:
classification_reports=[classification_report(y_test[:, i], y_pred[:, i]) for i in range(y_test.shape[1])]

for report in classification_reports:
    print(report)

                      precision    recall  f1-score   support

           Albuterol       1.00      1.00      1.00        14
         Amoxicillin       1.00      1.00      1.00        26
        Azithromycin       1.00      1.00      1.00        69
          Budesonide       0.92      0.92      0.92        12
         Doxycycline       1.00      1.00      1.00        53
         Fluticasone       0.98      0.98      0.98        55
   Hypertonic saline       1.00      1.00      1.00        14
           Ibuprofen       1.00      1.00      1.00        55
Supportive care only       1.00      1.00      1.00        53
          Tiotropium       1.00      1.00      1.00        49

            accuracy                           0.99       400
           macro avg       0.99      0.99      0.99       400
        weighted avg       0.99      0.99      0.99       400

                 precision    recall  f1-score   support

         0.5 mg       0.92      0.92      0.92        12
0.63 to 1.25 m

In [51]:
#save the model
with open('medication_prescription.pkl', 'wb') as file:
    pickle.dump(model, file)

In [65]:
with open('medication_prescription.pkl', 'rb') as file:
    s_model = pickle.load(file)

In [67]:
def predict_medication(user_data):
    #convert the user data to a dataframe
    user_df=pd.DataFrame([user_data])

    #encode the user data
    categorical_data=user_df[['Gender','Smoking Status','Disease']]
    numeric_data=user_df[['Age']]

    categorical_data=encoder.transform(categorical_data).toarray()
    features = np.hstack([numeric_data, categorical_data])
    prediction=s_model.predict(features)
    return prediction

In [68]:
user_data={}
user_data['Age']=input('Enter the age of the patient: ')
user_data['Gender']=input('Enter Gender of the patient: ')
user_data['Smoking Status']=input('Enter Smoking Status of the patient: ')
user_data['Disease']=input('Enter the Disease of the patient: ')

pred=predict_medication(user_data)
pred


array([['Tiotropium', '1 puff (18 mcg)', 'Once daily']], dtype=object)

In [70]:
pred[0][0]

'Tiotropium'