In [2]:
# importing the necessary libraries

import numpy as np
import pandas as pd

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

In [5]:
# import the dataset and see the first 5 rows

data=pd.read_csv("medical_data.csv")
data.head()

Unnamed: 0,Patient_Problem,Disease,Prescription
0,"Constant fatigue and muscle weakness, struggli...",Chronic Fatigue Syndrome,"Cognitive behavioral therapy, graded exercise ..."
1,"Frequent severe migraines, sensitivity to ligh...",Migraine with Aura,"Prescription triptans, avoid triggers like bri..."
2,"Sudden weight gain and feeling cold, especiall...",Hypothyroidism,Levothyroxine to regulate thyroid hormone levels.
3,"High fever, sore throat, and swollen lymph nod...",Mononucleosis,"Rest and hydration, ibuprofen for pain."
4,"Excessive thirst and frequent urination, dry m...",Diabetes Mellitus,Insulin therapy and lifestyle changes.


In [6]:
# last 5 rows of the dataset
data.tail()

Unnamed: 0,Patient_Problem,Disease,Prescription
402,Noticeable thinning of the hair on the top of ...,Androgenetic Alopecia,"Minoxidil, finasteride for males."
403,"Greenish discharge from the eyes, accompanied ...",Conjunctivitis,Antibiotic or antihistamine eye drops.
404,"Experiencing confusion, difficulty speaking, a...",Stroke,"Immediate medical attention, clot-busting drugs."
405,Constant feeling of fullness in the ears and h...,Eustachian Tube Dysfunction,"Nasal steroids, autoinflation exercises."
406,"Blood in stool, along with a change in bowel m...",Colorectal Cancer,"Colonoscopy, potentially surgery, chemotherapy."


In [7]:
# shape of the dataset

data.shape

(407, 3)

In [8]:
# information of each column of the dataset

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Patient_Problem  407 non-null    object
 1   Disease          407 non-null    object
 2   Prescription     407 non-null    object
dtypes: object(3)
memory usage: 9.7+ KB


In [9]:
# check for NULL values

data.isnull().sum()

Patient_Problem    0
Disease            0
Prescription       0
dtype: int64

In [10]:
# check for duplicate value

data.duplicated().sum()

np.int64(0)

In [11]:
# Initialising the Tokenizer

tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(data['Patient_Problem'])

In [12]:
# Converting text to sequences

sequences = tokenizer.texts_to_sequences(data['Patient_Problem'])

In [13]:
# Padding the sequences

max_length = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

In [14]:
# Encoding the labels

label_encoder_disease = LabelEncoder()
label_encoder_prescription = LabelEncoder()

In [15]:
disease_labels = label_encoder_disease.fit_transform(data['Disease'])
prescription_labels = label_encoder_prescription.fit_transform(data['Prescription'])

In [16]:
# Converting labels to categorical

disease_labels_categorical = to_categorical(disease_labels)
prescription_labels_categorical = to_categorical(prescription_labels)

In [17]:
# Combining the labels into a single multi-label target variable

Y = np.hstack((disease_labels_categorical, prescription_labels_categorical))

In [18]:
# Defining model architecture

input_layer = Input(shape=(max_length,))
embedding = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding)
disease_output = Dense(len(label_encoder_disease.classes_), activation='softmax', name='disease_output')(lstm_layer)
prescription_output = Dense(len(label_encoder_prescription.classes_), activation='softmax', name='prescription_output')(lstm_layer)


In [19]:
# Compiling the model

model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])

model.compile(
    loss={'disease_output': 'categorical_crossentropy', 
    'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)

model.summary()

In [20]:
# Training the model
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output': prescription_labels_categorical},
          epochs=100, batch_size=32)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - disease_output_accuracy: 0.0031 - disease_output_loss: 5.1821 - loss: 11.1465 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9643  
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - disease_output_accuracy: 0.0230 - disease_output_loss: 5.1665 - loss: 11.1276 - prescription_output_accuracy: 0.0032 - prescription_output_loss: 5.9610   
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - disease_output_accuracy: 0.0325 - disease_output_loss: 5.1231 - loss: 11.0861 - prescription_output_accuracy: 3.5100e-04 - prescription_output_loss: 5.9632
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - disease_output_accuracy: 0.0139 - disease_output_loss: 4.9876 - loss: 10.9570 - prescription_output_accuracy: 0.0020 - prescription_output_loss: 5.9703       
Epoch 5/100
[1m13/13

<keras.src.callbacks.history.History at 0x22295330d60>

In [21]:
# Making Predictions

def make_prediction(patient_problem):
    # Preprocessing the input
    sequence = tokenizer.texts_to_sequences([patient_problem])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    
    # Making prediction
    prediction = model.predict(padded_sequence)
    
    # Decoding the prediction
    disease_index = np.argmax(prediction[0], axis=1)[0]
    prescription_index = np.argmax(prediction[1], axis=1)[0]
    
    disease_predicted = label_encoder_disease.inverse_transform([disease_index])[0]
    prescription_predicted = label_encoder_prescription.inverse_transform([prescription_index])[0]
    
    print(f"Predicted Disease: {disease_predicted}")
    print(f"Suggested Prescription: {prescription_predicted}")


patient_input = "I've experienced a loss of appetite and don't enjoy food anymore."
make_prediction(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step
Predicted Disease: Alopecia Areata
Suggested Prescription: Antidepressants; eating nutrient-rich foods.


In [22]:
model.save('model.h5')

