<a href="https://colab.research.google.com/github/Dynamic369/Medical_Diagnoser/blob/main/Medical-diagnoser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

### 1. Loading the dataset

In [3]:
#Loading the dataset
data = pd.read_csv('https://raw.githubusercontent.com/adil200/Medical-Diagnoser/main/medical_data.csv')
data.head()

Unnamed: 0,Patient_Problem,Disease,Prescription
0,"Constant fatigue and muscle weakness, struggli...",Chronic Fatigue Syndrome,"Cognitive behavioral therapy, graded exercise ..."
1,"Frequent severe migraines, sensitivity to ligh...",Migraine with Aura,"Prescription triptans, avoid triggers like bri..."
2,"Sudden weight gain and feeling cold, especiall...",Hypothyroidism,Levothyroxine to regulate thyroid hormone levels.
3,"High fever, sore throat, and swollen lymph nod...",Mononucleosis,"Rest and hydration, ibuprofen for pain."
4,"Excessive thirst and frequent urination, dry m...",Diabetes Mellitus,Insulin therapy and lifestyle changes.


### 2. Data Preprocessing and Prepration

In [6]:
# Tokenizing and sequencing the data
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(data['Patient_Problem'])

sequences = tokenizer.texts_to_sequences(data['Patient_Problem'])

In [8]:
# Padding sequences
max_length = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

In [11]:
# Encoding the labels and Converting them to categorical
label_encoder_disease = LabelEncoder()
label_encoder_prescription = LabelEncoder()

disease_labels = label_encoder_disease.fit_transform(data['Disease'])
prescription_labels = label_encoder_prescription.fit_transform(data['Prescription'])

# converting labels into categorical
disease_labels_categorical = to_categorical(disease_labels)
prescription_labels_categorical = to_categorical(prescription_labels)

In [12]:
# Combining label into Multi label taget variable
Y=np.hstack((disease_labels_categorical,prescription_labels_categorical))

### 3.Model Building

In [13]:
input_layer = Input(shape=(max_length,))

embedding = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding)

disease_output = Dense(len(label_encoder_disease.classes_), activation='softmax',
name='disease_output')(lstm_layer)

prescription_output = Dense(len(label_encoder_prescription.classes_),
activation='softmax', name='prescription_output')(lstm_layer)

In [14]:
#Compiling the model
model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])

model.compile(
    loss={'disease_output': 'categorical_crossentropy',
    'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)

model.summary()

In [16]:
#Training the model
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output':
      prescription_labels_categorical}, epochs=100, batch_size=32)

Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - disease_output_accuracy: 0.0193 - disease_output_loss: 5.1776 - loss: 11.1412 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9637
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - disease_output_accuracy: 0.0390 - disease_output_loss: 5.1503 - loss: 11.1132 - prescription_output_accuracy: 0.0160 - prescription_output_loss: 5.9628
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - disease_output_accuracy: 0.0363 - disease_output_loss: 5.0659 - loss: 11.0263 - prescription_output_accuracy: 0.0160 - prescription_output_loss: 5.9608
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - disease_output_accuracy: 0.0308 - disease_output_loss: 4.9452 - loss: 10.9090 - prescription_output_accuracy: 0.0161 - prescription_output_loss: 5.9641
Epoch 5/100
[1m13/13[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x7be1e74129f0>

## Making the Prediction

In [17]:
def make_prediction(patient_problem):
  #Preprocessing the input
  sequence = tokenizer.texts_to_sequences([patient_problem])
  padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')

  #Making the prediction
  predictions = model.predict(padded_sequence)

  # Decoding the prediction
  disease_index = np.argmax(predictions[0],axis=1)[0]
  prescription_index = np.argmax(predictions[1],axis=1)[0]

  disease_predicted = label_encoder_disease.inverse_transform([disease_index])[0]
  prescription_predicted = label_encoder_prescription.inverse_transform([prescription_index])[0]

  print(f"Predicted Disease:{disease_predicted}")
  print(f"Predicted Prescription:{prescription_predicted}")

In [18]:
patient_input = "I've experienced a loss of appetite and don't enjoy food anymore."
make_prediction(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 199ms/step
Predicted Disease:Depression
Predicted Prescription:Antidepressants; eating nutrient-rich foods.


In [19]:
patient_input="feeling warm or having a high body temperature, chills, shivering, headaches, muscle aches, fatigue, and loss of appetite"
make_prediction(patient_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Predicted Disease:Thyroid Nodule
Predicted Prescription:Regular monitoring; possible surgery if it grows or changes.
