In [12]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input,  Dense, Embedding, LSTM
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model

In [22]:
data=pd.read_csv('medical_data.csv')
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 407 entries, 0 to 406
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Patient_Problem  407 non-null    object
 1   Disease          407 non-null    object
 2   Prescription     407 non-null    object
dtypes: object(3)
memory usage: 9.7+ KB


Unnamed: 0,Patient_Problem,Disease,Prescription
0,"Constant fatigue and muscle weakness, struggli...",Chronic Fatigue Syndrome,"Cognitive behavioral therapy, graded exercise ..."
1,"Frequent severe migraines, sensitivity to ligh...",Migraine with Aura,"Prescription triptans, avoid triggers like bri..."
2,"Sudden weight gain and feeling cold, especiall...",Hypothyroidism,Levothyroxine to regulate thyroid hormone levels.
3,"High fever, sore throat, and swollen lymph nod...",Mononucleosis,"Rest and hydration, ibuprofen for pain."
4,"Excessive thirst and frequent urination, dry m...",Diabetes Mellitus,Insulin therapy and lifestyle changes.


**Tokenization**

Helps to convert the textual data into sequences of integers.

In [3]:
tokenizer = Tokenizer(num_words=5000,oov_token='<oov>')
tokenizer.fit_on_texts(data['Patient_Problem'])
sequences = tokenizer.texts_to_sequences(data['Patient_Problem'])

sequences

[[37, 71, 2, 111, 164, 368, 16, 369, 370],
 [24, 11, 268, 112, 16, 72, 2, 269],
 [26, 34, 133, 2, 9, 62, 18, 4, 3, 35, 2, 42],
 [93, 63, 53, 38, 2, 73, 165, 166, 9, 84, 371],
 [46, 74, 2, 24, 59, 36, 75, 372],
 [43, 270, 40, 7, 20, 167, 27, 373, 193, 29, 374, 194],
 [54, 34, 21, 2, 28, 168, 9, 375],
 [11, 50, 7, 271, 16, 3, 51, 94, 32, 195],
 [95, 44, 2, 96, 4, 3, 64, 18, 4, 3, 196, 2, 197],
 [134, 85, 272, 135, 3, 75, 2, 376],
 [377, 14, 52, 198, 136, 199],
 [169, 97, 50, 76, 2, 378],
 [55, 86, 18, 22, 28, 30, 273, 379, 17, 3, 23],
 [137, 5, 3, 23, 2, 77, 274, 200, 2, 380, 71],
 [26, 11, 138, 30, 201, 275, 276, 139, 31],
 [25, 60, 30, 78, 381, 382, 28, 168],
 [24, 170, 383, 171, 2, 384, 202, 140, 385],
 [11, 86, 2, 56, 33, 386, 277, 203],
 [387, 388, 389, 390, 113, 114, 391, 204],
 [14, 98, 87, 115, 2, 172],
 [13, 12, 9, 205, 392, 2, 393, 141, 278, 16, 279],
 [15, 24, 57, 112, 16, 72, 2, 269],
 [47, 8, 25, 60, 2, 142, 280, 18, 22, 28],
 [26, 58, 5, 173, 116, 9, 281],
 [394, 34, 21, 2,

**Padding**

In order to make the input sequences have the same lenght, we use the padding sequence

In [14]:
max_length=max(len(x) for x in sequences)
padded_sequence=pad_sequences(sequences,maxlen=max_lenght,padding='post',truncating='post')



**Encoding the labels**

We will encode the disease and prescription columsn as integers.

In [15]:
label_encoder_disease=LabelEncoder()
label_encoder_prescription=LabelEncoder()

disease_labels=label_encoder_disease.fit_transform(data['Disease'])
prescription_labels=label_encoder_prescription.fit_transform(data['Prescription'])

disease_labels_categorical=to_categorical(disease_labels)
prescription_labels_categorical=to_categorical(prescription_labels)




Combining the all the lables into one target variable


In [16]:
Y=np.hstack((disease_labels_categorical,prescription_labels_categorical))

print(Y)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


**Building the Model architecture**

In [17]:
input_layer=Input(shape=(max_length,))

embedding=Embedding(input_dim=5000,output_dim=64)(input_layer)
lstm_layer=LSTM(64)(embedding)

disease_output=Dense(len(label_encoder_disease.classes_),activation='softmax',name='disease_output')(lstm_layer)
prescription_output=Dense(len(label_encoder_prescription.classes_),activation='softmax',name='prescription_output')(lstm_layer)

**Compiling the model**

In [24]:
model=Model(inputs=input_layer,outputs=[disease_output,prescription_output])

model.compile(optimizer='adam',
              loss={'disease_output':'categorical_crossentropy', 'prescription_output':'categorical_crossentropy'},
              metrics={'disease_output':['accuracy'],'prescription_output':['accuracy']})

model.summary()

In [25]:
model.fit(padded_sequence,{'disease_output':disease_labels_categorical,'prescription_output':prescription_labels_categorical},epochs=100,batch_size=32)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - disease_output_accuracy: 0.0037 - disease_output_loss: 5.1818 - loss: 11.1456 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9639
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - disease_output_accuracy: 0.0163 - disease_output_loss: 5.1681 - loss: 11.1300 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9619
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - disease_output_accuracy: 0.0364 - disease_output_loss: 5.1421 - loss: 11.1016 - prescription_output_accuracy: 0.0022 - prescription_output_loss: 5.9595
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - disease_output_accuracy: 0.0288 - disease_output_loss: 5.0244 - loss: 10.9971 - prescription_output_accuracy: 9.6315e-04 - prescription_output_loss: 5.9726
Epoch 5/100
[1m13/13[0m [

<keras.src.callbacks.history.History at 0x7e5e522b6490>

**Making prediction**

In [26]:
def predict_disease_prescription(patient_problem):
    sequence=tokenizer.texts_to_sequences([patient_problem])
    padded_sequence=pad_sequences(sequence,maxlen=max_length,padding='post',truncating='post')
    predictions=model.predict(padded_sequence)


    disease_index=np.argmax(predictions[0])
    prescription_index=np.argmax(predictions[1],axis=1)[0]

    disease_label=label_encoder_disease.inverse_transform([disease_index])
    prescription_label=label_encoder_prescription.inverse_transform([prescription_index])

    print(f"Predicted Disease: {disease_label}")
    print(f"Suggested Prescription: {prescription_label}")


patient_input = "I've experienced a loss of appetite and don't enjoy food anymore."
predict_disease_prescription(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394ms/step
Predicted Disease: ['Depression']
Suggested Prescription: ['Antidepressants; eating nutrient-rich foods.']


In [27]:
patient_input='I feel very tired, and the temperature seems to be increasing rapidly.'
predict_disease_prescription(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Predicted Disease: ['Migraine with Aura']
Suggested Prescription: ['Prescription triptans, avoid triggers like bright lights.']


In [28]:
patient_input='I feel very tired, and outside I feel very cold, but inside feel very hot.'
predict_disease_prescription(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Predicted Disease: ['Major Depressive Disorder']
Suggested Prescription: ['Antidepressants; psychotherapy.']
