Medicine Recommender Model

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report
from fpdf import FPDF

# Reading the main data
disease_data=pd.read_csv('../Data/Final_Augmented_dataset_Diseases_and_Symptoms.csv')
print(disease_data.head())

# Viewing the all diseases 
disease_data_columns=dict(zip(disease_data.columns,range(378)))
print(disease_data_columns)

# Finding the sum of all null values
print(disease_data.isna().sum())

# Finding the number of unique diseases
diseases_list=disease_data['diseases'].unique().tolist()
print(diseases_list)

# Applying the label encoder and transforming them to numeric values
encoder=LabelEncoder()
disease_data['diseases']=encoder.fit_transform(disease_data['diseases'])
print(disease_data.head())

# Taking 10% of data for testing the accuracy
# sample_data=disease_data.sample(frac=0.1,random_state=8)
# print(sample_data.shape)

# Setting the input and output labels
X=disease_data.drop(columns=['diseases'],axis=1)
Y=disease_data['diseases']

# Printing the sizes
print(X.shape)
print(Y.shape)

# Mapping of classes to integer values
classes=encoder.classes_

# Creating the dictionary of classes to integer values
values_of_encoder=dict(zip(classes,range(len(classes))))
print(values_of_encoder)

# Splitting the data into tain and test data
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.2)

# Creating and fitting the model
Model=LogisticRegression(max_iter=200)
Model.fit(X_train,y_train)

#  Predicting the test data
y_pred=Model.predict(X_test)

# Accuarcy score of the model
accuracy=accuracy_score(y_pred,y_test)
print(accuracy)

# Classification report of the model
classification_report=classification_report(y_test,y_pred,zero_division=0)
print(classification_report)

# Predicting one value
predict=Model.predict([[0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
print(predict)

# Transforming the value of the disease
predicted_disease_name=encoder.inverse_transform(predict)[0]
print(predicted_disease_name)

# Reading the medicine data
medicine_data=pd.read_csv('../Data/Medicine_Details.csv')
# print(medicine_data.head())

# Dropping the unecessary columns
medicine_data=medicine_data.drop(columns=['Composition','Side_effects'],axis=1)
# print(medicine_data.head())

# Checking the sum of null rows
# print(medicine_data.isna().sum())

# Calculating the review from excellent,average and poor review percentage
medicine_data['Rating']=(medicine_data['Excellent Review %']*5+medicine_data['Average Review %']*3+medicine_data['Poor Review %']*1)/100

# Dropping the excellent,average and poor review percentage
medicine_data=medicine_data.drop(columns=['Excellent Review %','Average Review %','Poor Review %'],axis=1)

# Reading the updated medicine data
# print(medicine_data.head())

# Checking the number of the tablets that are greater than 3.0
reviews_greater_than_3=medicine_data['Rating']>3.00
# print(reviews_greater_than_3.sum())

# Sorting the medicine data by the rating value
medicine_data=medicine_data.sort_values(by='Rating',ascending=False)
# print(medicine_data.head())

# Resetting the index values
medicine_data.reset_index(drop=True,inplace=True)
# print(medicine_data.head())

# Creating a route_of_administration column
medicine_data['Route_of_Administration']=medicine_data['Drug_Name'].str.split().str[-1]

# Reading the updated medicine data
print(medicine_data.head())

# Cleaning the uses column by removing some words
# Words to remove
words_to_remove=['to','Treatment','prevention','of','Prevention','treatment','and','disease','due','the','in','body']

# Creating a regex pattern to match the words
pattern='|'.join(r'\b{}\b'.format(word) for word in words_to_remove)

# Replace the words in the 'Uses' column with an empty string
medicine_data['Uses']=medicine_data['Uses'].replace(pattern,'',regex=True)

# Optionally, clean up extra spaces
medicine_data['Uses']=medicine_data['Uses'].str.replace(r'\s+',' ',regex=True).str.strip()

# Reading the medicine dataframe
# print(medicine_data.head())

# saving the medicine data to new file
path='../Data/Cleaned_Medicine_data.csv'
medicine_data.to_csv(path)

# Setting the cleaned medicine dataframe to final medicine data
final_medicine_data=pd.read_csv('../Data/Cleaned_Medicine_data.csv')

# predicted_disease_name='cough'

# Ensure the predicted disease is a string
predicted_disease_now = str(predicted_disease_name).lower()

print(type(predicted_disease_now))

disease_words=predicted_disease_now.split()
patterns='|'.join(re.escape(word) for word in disease_words)

# Filtering the medicine data based on the predicted disease
filtered_medicine_data=final_medicine_data[final_medicine_data['Uses'].str.contains(patterns,case=False,na=False)]

# Finally printing the data
print(filtered_medicine_data)

# First five rows of filtered_medicine_data
if len(final_medicine_data)>0:
  medicines=filtered_medicine_data.head()
else:
    medicines="No Medicines found"
print(medicines)

# Patient info
patient_name = input("Enter the patient name: ")
patient_age = int(input("Enter patient's age: "))

# Disease name
disease=predicted_disease_name

# Image of AI generated signature
image_path='../Image/Signature_TheDoctorAI.png'

def generate_pdf(pdf_output_path):
    class PDF(FPDF):
        def header(self):
            self.set_font('Arial', 'B', 16)
            self.cell(0, 10, 'TheDoctorAI', 0, 1, 'C')  # Align text to center
            self.ln(10)
        
        def patient_info(self, name, age, current_date):
            self.set_font('Arial', 'B', 12)
            self.cell(0, 10, f"Name: {name}", 0, 0, 'L')
            self.cell(0, 10, f"Date: {current_date}", 0, 1, 'R')
            self.cell(0, 10, f"Age: {age}", 0, 1, 'L')
            self.ln(10)
        
        def chapter_title(self, title):
            self.set_font('Arial', 'B', 12)
            self.cell(0, 10, title, 0, 1, 'C')  # Align text to center
            self.ln(10)

        def chapter_body(self, body):
            self.set_font('Arial', '', 12)
            self.write(5, body)
            self.ln()

        def add_signature(self,image_path):
            self.ln(10)
            self.set_font('Arial','B',14)
            self.cell(0,10,'Signature',0,1,'R')
            self.image(image_path, x=self.w - 40, y=self.get_y() - 15, w=30)


    pdf = PDF()
    pdf.add_page()

    current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    pdf.patient_info(patient_name, patient_age, current_datetime)

    pdf.chapter_title(f"Medicines for {disease}")

    # Display medicine data as normal text
    for row in medicines.itertuples():
        medicine_info = f"{row.Drug_Name}: {row.Uses}, Rating: {row.Rating}, Route of Administration: {row.Route_of_Administration}"
        pdf.chapter_body(medicine_info.encode('UTF-8').decode('latin-1'))  # Encode to UTF-8
        pdf.ln()
    pdf.add_signature(image_path)
    pdf.output(pdf_output_path)

    print(f"PDF generated successfully at {pdf_output_path}")

# Path to save the PDF
pdf_output_path = 'Prescription.pdf'

# Generate the PDF
generate_pdf(pdf_output_path)

         diseases  anxiety and nervousness  depression  shortness of breath  \
0  panic disorder                        1           0                    1   
1  panic disorder                        0           0                    1   
2  panic disorder                        1           1                    1   
3  panic disorder                        1           0                    0   
4  panic disorder                        1           1                    0   

   depressive or psychotic symptoms  sharp chest pain  dizziness  insomnia  \
0                                 1                 0          0         0   
1                                 1                 0          1         1   
2                                 1                 0          1         1   
3                                 1                 0          1         1   
4                                 0                 0          0         1   

   abnormal involuntary movements  chest tightness  ... 



[409]
kidney failure
                      Drug_Name  \
0           Momefit Nasal Spray   
1  Lmx Forte 500mg/125mg Tablet   
2          Nexopil -DSR Capsule   
3           Abrophyll-DM Tablet   
4             Amlopin 10 Tablet   

                                                Uses  \
0  Treatment and prevention of Sneezing and runny...   
1                  Treatment of Bacterial infections   
2  Treatment of Gastroesophageal reflux disease (...   
3                                             Asthma   
4  Treatment of Hypertension (high blood pressure...   

                                           Image URL  \
0  https://onemg.gumlet.io/l_watermark_346,w_480,...   
1  https://onemg.gumlet.io/l_watermark_346,w_480,...   
2  https://onemg.gumlet.io/l_watermark_346,w_480,...   
3  https://onemg.gumlet.io/l_watermark_346,w_480,...   
4  https://onemg.gumlet.io/l_watermark_346,w_480,...   

                 Manufacturer  Rating Route_of_Administration  
0   Entod Pharmaceuticals Ltd 