In [23]:
"""
import pandas as pd

# URL to the CSV file
url = 'https://raw.githubusercontent.com/adil200/Medical-Diagnoser/main/medical_data.csv'

# Read the CSV file from the URL into a DataFrame
df = pd.read_csv(url)

# Display the DataFrame
print(df)
"""

"\nimport pandas as pd\n\n# URL to the CSV file\nurl = 'https://raw.githubusercontent.com/adil200/Medical-Diagnoser/main/medical_data.csv'\n\n# Read the CSV file from the URL into a DataFrame\ndf = pd.read_csv(url)\n\n# Display the DataFrame\nprint(df)\n"

In [6]:
import os
import requests
import numpy as np
import pandas as pd
import numpy as np
from io import StringIO
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense


In [12]:
##  URL containing CSV data
url = 'https://raw.githubusercontent.com/adil200/Medical-Diagnoser/main/medical_data.csv'

# Make an HTTP GET request to fetch the data
response = requests.get(url)

# Ensure the request was successful
if response.status_code == 200:                                                      ## 200 means reponse has been successfully created
    ## Convert the response text to a StringIO object                                   
    csv_data = StringIO(response.text)                                               ## Convert the response text to a StringIO object 

    # Load the CSV data into a DataFrame
    data = pd.read_csv(csv_data)                                                      ## data dataframe
else:
    print(f"Failed to fetch data. HTTP Status code: {response.status_code}")


In [11]:
data.head(2)

Unnamed: 0,Patient_Problem,Disease,Prescription
0,"Constant fatigue and muscle weakness, struggli...",Chronic Fatigue Syndrome,"Cognitive behavioral therapy, graded exercise ..."
1,"Frequent severe migraines, sensitivity to ligh...",Migraine with Aura,"Prescription triptans, avoid triggers like bri..."


In [13]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")                 ## Top 5000 words in vocabulary and remaining will be marked as OOV
tokenizer.fit_on_texts(data['Patient_Problem'])                                ## learns from vocabulary and will assign unique integer index to each token in the entire dataset


sequences = tokenizer.texts_to_sequences(data['Patient_Problem'])           ## used to convert a list of texts into sequences of integers based on a learned vocabulary


In [14]:
max_length = max(len(x) for x in sequences)                               ## max length among all sequences.
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')   ## padded value will be 0 by default.

In [15]:
# Encoding the labels
label_encoder_disease = LabelEncoder()
label_encoder_prescription = LabelEncoder()

disease_labels = label_encoder_disease.fit_transform(data['Disease'])                      ## We are converting categorical labels to into integers through encoding.
prescription_labels = label_encoder_prescription.fit_transform(data['Prescription'])       ## We are converting categorical labels to into integers through encoding.

# Converting labels to categorical
disease_labels_categorical = to_categorical(disease_labels)                                   ## Again converting these integres into categorical format(one-hot-encoding
prescription_labels_categorical = to_categorical(prescription_labels)                       ## Again converting these integres into categorical format(one-hot-encoding)


In [16]:
Y = np.hstack((disease_labels_categorical, prescription_labels_categorical))                  ## It will horizontally stack 2 arrays that are dependent variables into one dependent variable


In [17]:
input_layer = Input(shape=(max_length,)) # Input layer of neauron

embedding = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding)

disease_output = Dense(len(label_encoder_disease.classes_), activation='softmax',
name='disease_output')(lstm_layer)

prescription_output = Dense(len(label_encoder_prescription.classes_),
activation='softmax', name='prescription_output')(lstm_layer)


![image.png](attachment:image.png)

In [19]:
model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])           ## 2 outputs disease and prescription

model.compile(
    loss={'disease_output': 'categorical_crossentropy',
    'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)

model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 17)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 17, 64)               320000    ['input_1[0][0]']             
                                                                                                  
 lstm (LSTM)                 (None, 64)                   33024     ['embedding[0][0]']           
                                                                                                  
 disease_output (Dense)      (None, 178)                  11570     ['lstm[0][0]']                
                                                                                              

In [20]:
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output':                   ## Btach size-subset of traing dataset
      prescription_labels_categorical}, epochs=100, batch_size=32)                                             ## 1 epoch means learns from entire dataset and adjusts weights in back propogation

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x781ead75dff0>

In [21]:
def make_prediction(patient_problem):
    # Preprocessing the input
    sequence = tokenizer.texts_to_sequences([patient_problem])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')

    # Making prediction
    prediction = model.predict(padded_sequence)

    # Decoding the prediction
    disease_index = np.argmax(prediction[0], axis=1)[0]
    prescription_index = np.argmax(prediction[1], axis=1)[0]

    disease_predicted = label_encoder_disease.inverse_transform([disease_index])[0]
    prescription_predicted = label_encoder_prescription.inverse_transform([prescription_index])[0]

    print(f"Predicted Disease: {disease_predicted}")
    print(f"Suggested Prescription: {prescription_predicted}")


patient_input = "I've experienced a loss of appetite and don't enjoy food anymore."
make_prediction(patient_input)


Predicted Disease: Tinnitus
Suggested Prescription: Antidepressants; eating nutrient-rich foods.
