## Import Libraries and Set Up the Environment


In [365]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

# Load the dataset

In [366]:
dataset = pd.read_csv('medical_data.csv')

# Initialize NLTK tools

In [367]:
nltk.data.path.append('/Users/bahodirnematjonov/Desktop/AI-Machine-Learning-Deep-Learning-Computer-Vision/nltk_data')
all_words = set(stopwords.words('english'))
ps = PorterStemmer()

# Preprocessing function

In [368]:
def preprocessong(text):
    reviews = re.sub('[^a-zA-Z]', ' ', text)
    reviews = reviews.lower()
    reviews = reviews.split()
    review = [ps.stem(word) for word in reviews if word not in all_words]
    return ' '.join(review) 

# Preprocess the Patient_Problem column

In [369]:
corpus = dataset['Patient_Problem'].apply(preprocessong).tolist()

# Tokenization and padding

In [391]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(corpus)
sequences = tokenizer.texts_to_sequences(corpus)
max_length = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length)


# Encode labels

In [371]:
disease_label = LabelEncoder()
prescription_label = LabelEncoder()

disease_encoder = disease_label.fit_transform(dataset['Disease'])
prescription_encoder = prescription_label.fit_transform(dataset['Prescription'])

# Encode the disease and prescription labels to categorical format

In [372]:
disease_encoded = to_categorical(disease_encoder)
prescription_encoded = to_categorical(prescription_encoder)

# Build the model

In [373]:
input_layer = Input(shape=(max_length,))
embedding_layer = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding_layer)
output_layer = Dense(10, activation='softmax')(lstm_layer)

# Output layers for disease and prescription predictions

In [374]:
disease_output = Dense(len(disease_label.classes_), activation='softmax', name='disease_output')(lstm_layer)
prescription_output = Dense(len(prescription_label.classes_), activation='softmax', name='prescription_output')(lstm_layer)



# Compile the model

In [380]:
model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])
model.compile(
    optimizer='adam',
    loss={
        'disease_output': 'categorical_crossentropy',
        'prescription_output': 'categorical_crossentropy',
    },
    metrics={'disease_output': 'accuracy',          # Metric for disease output
        'prescription_output': 'accuracy'}   
)
model.summary()

# Train the model

In [381]:
model.fit(padded_sequences, {
    'disease_output': disease_encoded,
    'prescription_output': prescription_encoded,
},
batch_size=32,
epochs=100
)


Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - disease_output_accuracy: 0.0255 - disease_output_loss: 5.1810 - loss: 11.1437 - prescription_output_accuracy: 0.0000e+00 - prescription_output_loss: 5.9628
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - disease_output_accuracy: 0.1322 - disease_output_loss: 5.1646 - loss: 11.1229 - prescription_output_accuracy: 0.0111 - prescription_output_loss: 5.9583   
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - disease_output_accuracy: 0.1038 - disease_output_loss: 5.1409 - loss: 11.0941 - prescription_output_accuracy: 0.0252 - prescription_output_loss: 5.9533
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - disease_output_accuracy: 0.0491 - disease_output_loss: 5.0613 - loss: 11.0118 - prescription_output_accuracy: 0.0091 - prescription_output_loss: 5.9504
Epoch 5/100
[1m13/13[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x3190f2e10>

# Prediction function

In [392]:
def patient_prediction(patient_input):
    text_preprocessing = preprocessong(patient_input)
    text_token = tokenizer.texts_to_sequences([text_preprocessing])
    text_padding = pad_sequences(text_token, maxlen=max_length)
    return text_padding

# Example usage

In [412]:
patient_request = 'Constant fatigue and muscle weakness, struggling to stay awake.'
function_res = patient_prediction(patient_request)
prediction = model.predict(function_res)
disease_index = np.argmax(prediction[0])
prescription_index = np.argmax(prediction[1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


# Retrieve the predicted label

In [417]:
disease_inverse = ' '.join( disease_label.inverse_transform([disease_index]))
prescription_inverse = ' '.join(prescription_label.inverse_transform([prescription_index]))

In [418]:
print(f"Your Disease Type Is: {disease_inverse}")
print(f"Recommendation: {prescription_inverse}")

Your Disease Type Is: Chronic Fatigue Syndrome
Recommendation: Cognitive behavioral therapy, graded exercise therapy.


# Print the results