## Importing Libraries and Dataset

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense


In [2]:
from google.colab import files
uploaded=files.upload()

Saving medical_data.csv to medical_data.csv


In [12]:
data = pd.read_csv('/content/medical_data.csv')
print(data)


                                       Patient_Problem  \
0    Constant fatigue and muscle weakness, struggli...   
1    Frequent severe migraines, sensitivity to ligh...   
2    Sudden weight gain and feeling cold, especiall...   
3    High fever, sore throat, and swollen lymph nod...   
4    Excessive thirst and frequent urination, dry m...   
..                                                 ...   
402  Noticeable thinning of the hair on the top of ...   
403  Greenish discharge from the eyes, accompanied ...   
404  Experiencing confusion, difficulty speaking, a...   
405  Constant feeling of fullness in the ears and h...   
406  Blood in stool, along with a change in bowel m...   

                         Disease  \
0       Chronic Fatigue Syndrome   
1             Migraine with Aura   
2                 Hypothyroidism   
3                  Mononucleosis   
4              Diabetes Mellitus   
..                           ...   
402        Androgenetic Alopecia   
403            

In [13]:
data.head()

Unnamed: 0,Patient_Problem,Disease,Prescription
0,"Constant fatigue and muscle weakness, struggli...",Chronic Fatigue Syndrome,"Cognitive behavioral therapy, graded exercise ..."
1,"Frequent severe migraines, sensitivity to ligh...",Migraine with Aura,"Prescription triptans, avoid triggers like bri..."
2,"Sudden weight gain and feeling cold, especiall...",Hypothyroidism,Levothyroxine to regulate thyroid hormone levels.
3,"High fever, sore throat, and swollen lymph nod...",Mononucleosis,"Rest and hydration, ibuprofen for pain."
4,"Excessive thirst and frequent urination, dry m...",Diabetes Mellitus,Insulin therapy and lifestyle changes.


In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Patient_Problem  407 non-null    object
 1   Disease          407 non-null    object
 2   Prescription     407 non-null    object
dtypes: object(3)
memory usage: 9.7+ KB


## Data Preprocessing and Preparation

In [4]:
#Tokenizing and Sequencing Text Data
tokenizer = Tokenizer(num_words=5000, oov_token="")
tokenizer.fit_on_texts(data['Patient_Problem'])

sequences = tokenizer.texts_to_sequences(data['Patient_Problem'])


In [5]:
max_length = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

In [6]:
# Encoding the labels
label_encoder_disease = LabelEncoder()
label_encoder_prescription = LabelEncoder()

disease_labels = label_encoder_disease.fit_transform(data['Disease'])
prescription_labels = label_encoder_prescription.fit_transform(data['Prescription'])

# Converting labels to categorical
disease_labels_categorical = to_categorical(disease_labels)
prescription_labels_categorical = to_categorical(prescription_labels)

In [7]:
Y = np.hstack((disease_labels_categorical, prescription_labels_categorical))

## Model Building

In [8]:
input_layer = Input(shape=(max_length,))

embedding = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding)

disease_output = Dense(len(label_encoder_disease.classes_), activation='softmax',
name='disease_output')(lstm_layer)

prescription_output = Dense(len(label_encoder_prescription.classes_),
activation='softmax', name='prescription_output')(lstm_layer)

## Compiling the model

In [9]:
model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])

model.compile(
    loss={'disease_output': 'categorical_crossentropy',
    'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)

model.summary()


In [10]:
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output':
      prescription_labels_categorical}, epochs=100, batch_size=32)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - disease_output_accuracy: 0.0128 - disease_output_loss: 5.1811 - loss: 11.1445 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9635
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - disease_output_accuracy: 0.0443 - disease_output_loss: 5.1646 - loss: 11.1255 - prescription_output_accuracy: 0.0073 - prescription_output_loss: 5.9610
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - disease_output_accuracy: 0.0360 - disease_output_loss: 5.1285 - loss: 11.0904 - prescription_output_accuracy: 0.0022 - prescription_output_loss: 5.9619
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - disease_output_accuracy: 0.0339 - disease_output_loss: 5.0104 - loss: 10.9860 - prescription_output_accuracy: 0.0111 - prescription_output_loss: 5.9759
Epoch 5/100
[1m13/13[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x7db3e1aeae90>

## Making Prediction

In [11]:
def make_prediction(patient_problem):
    # Preprocessing the input
    sequence = tokenizer.texts_to_sequences([patient_problem])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')

    # Making prediction
    prediction = model.predict(padded_sequence)

    # Decoding the prediction
    disease_index = np.argmax(prediction[0], axis=1)[0]
    prescription_index = np.argmax(prediction[1], axis=1)[0]

    disease_predicted = label_encoder_disease.inverse_transform([disease_index])[0]
    prescription_predicted = label_encoder_prescription.inverse_transform([prescription_index])[0]

    print(f"Predicted Disease: {disease_predicted}")
    print(f"Suggested Prescription: {prescription_predicted}")


patient_input = "I've experienced a loss of appetite and don't enjoy food anymore."
make_prediction(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
Predicted Disease: Depression
Suggested Prescription: Antidepressants; eating nutrient-rich foods.
