In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Input, Concatenate
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import pickle

# Load Dataset
df = pd.read_csv("symptom_based_medical_recommendation_10000 (3).csv")

df.columns = df.columns.str.strip()
if 'Medicine Recommendation' not in df.columns:
    raise KeyError("Column 'Medicine Recommendation' not found. Available columns: ", df.columns)

df.rename(columns={'Medicine Recommendation': 'Recommended_Medicines'}, inplace=True)
df['Symptoms'] = df['Symptoms'].astype(str)

# Encode Disease Labels
label_encoder = LabelEncoder()
df['Disease_Encoded'] = label_encoder.fit_transform(df['Disease'])

# Tokenization & Padding for Symptoms
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Symptoms'])
X_sequences = tokenizer.texts_to_sequences(df['Symptoms'])
X_padded = pad_sequences(X_sequences, maxlen=10, padding='post')

# Extract Age as a Feature
X_age = df['Age'].values.reshape(-1, 1)

df['Recommended_Medicines'] = df['Recommended_Medicines'].fillna('').apply(lambda x: [med.strip() for med in x.split(',')])

# Encode Medicines using MultiLabelBinarizer
medicine_encoder = MultiLabelBinarizer()
y_medicine = medicine_encoder.fit_transform(df['Recommended_Medicines'])

# Splitting Data
X_train_symptoms, X_test_symptoms, X_train_age, X_test_age, y_train_disease, y_test_disease, y_train_medicine, y_test_medicine = train_test_split(
    X_padded, X_age, df['Disease_Encoded'], y_medicine, test_size=0.2, random_state=42
)

# Build Model
input_symptoms = Input(shape=(10,))
input_age = Input(shape=(1,))
embedding = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)(input_symptoms)
lstm = LSTM(64, return_sequences=True)(embedding)
lstm = LSTM(32)(lstm)
merged = Concatenate()([lstm, input_age])
disease_output = Dense(len(df['Disease'].unique()), activation='softmax', name='disease_output')(merged)
medicine_output = Dense(y_medicine.shape[1], activation='sigmoid', name='medicine_output')(merged)

model = Model(inputs=[input_symptoms, input_age], outputs=[disease_output, medicine_output])
model.compile(
    loss={'disease_output': 'sparse_categorical_crossentropy', 'medicine_output': 'binary_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'medicine_output': ['accuracy']}
)

# Train Model
model.fit(
    [X_train_symptoms, X_train_age],
    {'disease_output': y_train_disease, 'medicine_output': y_train_medicine},
    validation_data=([X_test_symptoms, X_test_age], {'disease_output': y_test_disease, 'medicine_output': y_test_medicine}),
    epochs=10,
    batch_size=32
)

# Save Model & Preprocessing Objects
model.save("model/lstm_model.h5")
pickle.dump(tokenizer, open("model/tokenizer.pkl", "wb"))
pickle.dump(label_encoder, open("model/label_encoder.pkl", "wb"))
pickle.dump(medicine_encoder, open("model/medicine_encoder.pkl", "wb"))

print("✅ Model training complete and saved successfully!")

# Load Model & Preprocessing Objects
model = tf.keras.models.load_model("model/lstm_model.h5")
tokenizer = pickle.load(open("model/tokenizer.pkl", "rb"))
label_encoder = pickle.load(open("model/label_encoder.pkl", "rb"))
medicine_encoder = pickle.load(open("model/medicine_encoder.pkl", "rb"))

def predict_disease_and_medicine(symptoms_input, age_input):
    symptoms_seq = tokenizer.texts_to_sequences([symptoms_input])
    symptoms_padded = pad_sequences(symptoms_seq, maxlen=10, padding='post')
    age_array = np.array([[age_input]])

    predictions = model.predict([symptoms_padded, age_array])

    # Predict Disease
    disease_pred = np.argmax(predictions[0])
    predicted_disease = label_encoder.inverse_transform([disease_pred])[0]

    # Predict Medicines (Lower threshold to 0.3)
    medicine_pred = (predictions[1] > 0.3).astype(int)
    recommended_medicines = medicine_encoder.inverse_transform(medicine_pred)[0]

    print("\n🔹 Debug: Raw medicine probabilities:", predictions[1])  # Debugging line

    return predicted_disease, recommended_medicines

# Take User Input
user_symptoms = input("\n🔹 Enter your symptoms (comma-separated): ")
try:
    user_age = int(input("🔹 Enter your age: "))
    disease, medicines = predict_disease_and_medicine(user_symptoms, user_age)
    
    print("\n🔹 Predicted Disease:", disease)
    if medicines:
        print("🔹 Recommended Medicines:", ", ".join(medicines))
    else:
        print("⚠️ No specific medicine recommended. Try adjusting the threshold.")
except ValueError:
    print("⚠️ Invalid input. Please enter a valid age as a number.")


Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - disease_output_accuracy: 0.0315 - disease_output_loss: 10.0333 - loss: 11.8150 - medicine_output_accuracy: 0.0240 - medicine_output_loss: 1.7817 - val_disease_output_accuracy: 0.0710 - val_disease_output_loss: 4.0566 - val_loss: 4.2603 - val_medicine_output_accuracy: 0.0860 - val_medicine_output_loss: 0.1954
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - disease_output_accuracy: 0.1533 - disease_output_loss: 3.3887 - loss: 3.5906 - medicine_output_accuracy: 0.0710 - medicine_output_loss: 0.2019 - val_disease_output_accuracy: 0.5525 - val_disease_output_loss: 1.8713 - val_loss: 2.0532 - val_medicine_output_accuracy: 0.1240 - val_medicine_output_loss: 0.1809
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - disease_output_accuracy: 0.6841 - disease_output_loss: 1.5096 - loss: 1.6792 - medicine_output_accuracy: 0.1557 - med



✅ Model training complete and saved successfully!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241ms/step

🔹 Debug: Raw medicine probabilities: [[2.4264245e-04 2.0140750e-02 1.5950808e-04 8.3491700e-03 9.8849973e-03
  1.1430204e-02 5.0342507e-03 4.1733623e-05 3.2654556e-05 2.0895335e-03
  5.4776163e-05 1.3908909e-01 3.8599446e-03 4.3164277e-05 1.8841524e-03
  2.8137414e-04 1.3861601e-03 8.1026237e-03 2.6678755e-03 3.3973720e-05
  1.0465722e-03 2.3244044e-02 1.0418072e-03 1.9744992e-02 9.0358332e-03
  4.5637256e-03 5.3580995e-03 1.7927685e-03 3.0164763e-03 7.4491608e-01
  1.4875222e-02 1.5401638e-03 2.3386699e-04 5.7591661e-03 1.4574865e-03
  1.0345627e-03 3.1346474e-02 7.9434180e-01 4.5600980e-05 5.5521834e-03
  1.8530391e-02 2.9756537e-02 1.1044890e-01 9.4768370e-04 8.9959688e-03
  5.3382735e-03 3.6074608e-03 5.4622060e-03 7.7687795e-03 2.2978008e-02
  4.9954210e-03 1.0424540e-02 4.2058458e-04 5.7997339e-04 1.4472255e-02]]

🔹 Predicted Disease: GERD
🔹 Recommended Medi