In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

In [2]:
data=pd.read_csv("medical_data.csv")
data.head()

Unnamed: 0,Patient_Problem,Disease,Prescription
0,"Constant fatigue and muscle weakness, struggli...",Chronic Fatigue Syndrome,"Cognitive behavioral therapy, graded exercise ..."
1,"Frequent severe migraines, sensitivity to ligh...",Migraine with Aura,"Prescription triptans, avoid triggers like bri..."
2,"Sudden weight gain and feeling cold, especiall...",Hypothyroidism,Levothyroxine to regulate thyroid hormone levels.
3,"High fever, sore throat, and swollen lymph nod...",Mononucleosis,"Rest and hydration, ibuprofen for pain."
4,"Excessive thirst and frequent urination, dry m...",Diabetes Mellitus,Insulin therapy and lifestyle changes.


In [3]:
tokenizer=Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(data['Patient_Problem'])

sequences=tokenizer.texts_to_sequences(data['Patient_Problem'])

In [4]:
max_length= max(len(x)for x in sequences)
padded_sequences= pad_sequences(sequences, maxlen=max_length, padding='post')

In [5]:
label_encoder_disease= LabelEncoder()
label_encoder_prescription= LabelEncoder()

disease_labels= label_encoder_disease.fit_transform(data['Disease'])
prescription_labels= label_encoder_prescription.fit_transform(data['Prescription'])

disease_labels_categorical= to_categorical(disease_labels)
prescription_labels_categorical= to_categorical(prescription_labels)

In [6]:
Y=np.hstack((disease_labels_categorical, prescription_labels_categorical))

In [7]:
input_layer= Input(shape=(max_length,))

embedding= Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer= LSTM(64)(embedding)

disease_output= Dense(len(label_encoder_disease.classes_), activation='softmax', name='disease_output')(lstm_layer)
prescription_output= Dense(len(label_encoder_prescription.classes_), activation='softmax', name='prescription_output')(lstm_layer)

In [8]:
model= Model(inputs=input_layer, outputs=[disease_output, prescription_output])

model.compile(
    loss={'disease_output': 'categorical_crossentropy', 'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)

model.summary()

In [9]:
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output': prescription_labels_categorical}, epochs=100, batch_size=32)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 42ms/step - disease_output_accuracy: 7.0200e-04 - disease_output_loss: 5.1790 - loss: 11.1430 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9640
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - disease_output_accuracy: 0.0374 - disease_output_loss: 5.1589 - loss: 11.1196 - prescription_output_accuracy: 0.0018 - prescription_output_loss: 5.9608   
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - disease_output_accuracy: 0.0399 - disease_output_loss: 5.1038 - loss: 11.0647 - prescription_output_accuracy: 9.6315e-04 - prescription_output_loss: 5.9611
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - disease_output_accuracy: 0.0364 - disease_output_loss: 4.9862 - loss: 10.9571 - prescription_output_accuracy: 0.0081 - prescription_output_loss: 5.9710
Epoch 5/100
[1m13/13[0

<keras.src.callbacks.history.History at 0x1cd089ccda0>

In [10]:
def make_prediction(patient_problem):
    sequence=tokenizer.texts_to_sequences([patient_problem])
    padded_sequence= pad_sequences(sequence, maxlen=max_length, padding='post')

    prediction=model.predict(padded_sequence)

    disease_index= np.argmax(prediction[0], axis=1)[0]
    prescription_index= np.argmax(prediction[1], axis=1)[0]

    disease_predicted= label_encoder_disease.inverse_transform([disease_index])[0]
    prescription_predicted= label_encoder_prescription.inverse_transform([prescription_index])[0]

    print(f"Predicted Disease: {disease_predicted}")
    print(f"Suggested Prescription: {prescription_predicted}")

patient_input= "I've experienced a loss of appetite and don't enjoy food anymore."
make_prediction(patient_input)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 548ms/step
Predicted Disease: Pleurisy
Suggested Prescription: NSAIDs; rest; consult if persistent.


In [22]:
model.save('pred.h5')

