In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# Load dataset
file_path = 'expanded_synthetic_health_data.csv'
df = pd.read_csv(file_path)

# Initialize encoders and scaler
scaler = StandardScaler()
label_encoder = LabelEncoder()

# Define mappings
gender_mapping = {'Male': 1, 'Female': 0}
symptom_mapping = {
    "Abdominal pain": 1.0, "Chest pain": 2.0, "Constipation": 3.0, "Cough": 4.0, "Diarrhea": 5.0,
    "Difficulty swallowing": 6.0, "Dizziness": 7.0, "Eye discomfort and redness": 8.0,
    "Foot pain or ankle pain": 9.0, "Foot swelling or leg swelling": 10.0, "Headaches": 11.0,
    "Heart palpitations": 12.0, "Hip pain": 13.0, "Knee pain": 14.0, "Low back pain": 15.0,
    "Nasal congestion": 16.0, "Nausea or vomiting": 17.0, "Neck pain": 18.0, "Numbness or tingling in hands": 19.0,
    "Shortness of breath": 20.0, "Shoulder pain": 21.0, "Sore throat": 22.0, "Urinary problems": 23.0,
    "Wheezing": 24.0, "Ear ache": 25.0, "Fever": 26.0, "Joint pain or muscle pain": 27.0, "Skin rashes": 28.0
}
symptom_duration_mapping = {'Less than 2 days': 0, '2-5 days': 1, 'More than 5 days': 2}
onset_mapping = {'Sudden': 1, 'Gradual': 0}
chronic_mapping = {
    "Diabetes": 1.0, "Hypertension": 2.0, "Asthma": 3.0, "Arthritis": 4.0, "Obesity": 5.0,
    "Cholesterol": 6.0, "Depression": 7.0, "Cirrhosis": 8.0, "No chronic conditions": 9.0
}
alcohol_mapping = {'No': 0, 'Occasionally': 1, 'Regularly': 2}
physical_mapping = {'No': 0, 'Light': 1, 'Moderate': 2, 'Intense': 3}
sleep_mapping = {'Excellent': 3.0, 'Good': 2.0, 'Fair': 1.0, 'Poor': 0.0}

symptoms_medications = {
    "Abdominal pain": "Antacids, Antispasmodics, Proton Pump Inhibitors, Analgesics",
    "Chest pain": "Nitroglycerin, Aspirin, Proton Pump Inhibitors, Muscle relaxants",
    "Constipation": "Laxatives, Stool softeners, Fiber supplements",
    "Cough": "Cough suppressants, Expectorants, Antihistamines, Bronchodilators",
    "Diarrhea": "Antidiarrheal agents, Oral rehydration solutions, Probiotics",
    "Difficulty swallowing": "Proton Pump Inhibitors, Antacids",
    "Dizziness": "Antivertigo agents, Benzodiazepines, Hydration and electrolytes",
    "Eye discomfort and redness": "Artificial tears, Antihistamine eye drops, Antibiotic eye drops",
    "Foot pain or ankle pain": "NSAIDs, Topical analgesics",
    "Foot swelling or leg swelling": "Diuretics, Compression stockings",
    "Headaches": "NSAIDs, Acetaminophen, Triptans, Caffeine-containing medications",
    "Heart palpitations": "Beta-blockers, Calcium channel blockers, Antiarrhythmic drugs",
    "Hip pain": "NSAIDs, Corticosteroid injections",
    "Knee pain": "NSAIDs, Topical analgesics, Corticosteroid injections",
    "Low back pain": "NSAIDs, Muscle relaxants, Topical pain relievers",
    "Nasal congestion": "Decongestants, Nasal sprays",
    "Nausea or vomiting": "Antiemetics, Antacids, Ginger supplements",
    "Neck pain": "NSAIDs, Muscle relaxants",
    "Numbness or tingling in hands": "NSAIDs, Gabapentin, Vitamin B12 supplements",
    "Shortness of breath": "Bronchodilators, Inhaled corticosteroids, Diuretics",
    "Shoulder pain": "NSAIDs, Topical analgesics",
    "Sore throat": "Throat lozenges, NSAIDs, Saltwater gargle",
    "Urinary problems": "Antibiotics, Alpha-blockers",
    "Wheezing": "Bronchodilators, Inhaled corticosteroids, Leukotriene inhibitors",
    "Ear ache": "Analgesics, Antibiotic ear drops",
    "Fever": "Antipyretics, Hydration",
    "Joint pain or muscle pain": "NSAIDs, Topical analgesics, Glucosamine supplements, Corticosteroid injections",
    "Skin rashes": "Antihistamines, Topical corticosteroids, Antibiotic creams"
}

# Apply mappings
df['Gender'] = df['Gender'].map(gender_mapping)
df['General Symptoms'] = df['General Symptoms'].map(symptom_mapping)
df['Pain Scale'] = df['Pain Scale'] / 10.0
df['Symptom Duration'] = label_encoder.fit_transform(df['Symptom Duration'])
df['Onset'] = df['Onset'].map(onset_mapping)
df['Chronic Conditions'] = label_encoder.fit_transform(df['Chronic Conditions'].fillna(''))
df['Allergies'] = df['Allergies'].map({'Yes': 1, 'No': 0})
df['Medications'] = df['Medications'].map({'Yes': 1, 'No': 0})
df['Travel History'] = df['Travel History'].map({'Yes': 1, 'No': 0})
df['Contact with Sick Individuals'] = df['Contact with Sick Individuals'].map({'Yes': 1, 'No': 0})
df['Smoking'] = df['Smoking'].map({'Yes': 1, 'No': 0})
df['Alcohol Consumption'] = df['Alcohol Consumption'].map(alcohol_mapping)
df['Physical Activity'] = df['Physical Activity'].map(physical_mapping)
df['Stress Levels'] = df['Stress Levels'] / 10.0
df['Sleep Quality'] = df['Sleep Quality'].map(sleep_mapping)

# Normalize numeric columns
numeric_columns = ['Age', 'Symptom Duration', 'Chronic Conditions', 'Alcohol Consumption', 'Physical Activity', 'Sleep Quality']
df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

# Encode target variable
df['Severity'] = label_encoder.fit_transform(df['Severity'])
df.to_csv('new data.csv')

# Split dataset into features (X) and target (y)
X = df.drop(columns=['Severity'])
y = df['Severity'].values

# Split the dataset into train, validation, and test sets
from sklearn.model_selection import train_test_split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Build the neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(np.unique(y)), activation='softmax')
])

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# Function to get user input and make predictions
def get_user_input():
    """Function to get user input with options for each category and process based on the column's category"""

    user_data = {}
    general_symptom = None  # To store the value for General Symptoms

    # Get user input for each column
    for column in X.columns:
        if column == 'Gender':
            print(f"Options for {column}: Male, Female")
        elif column == 'General Symptoms':
            print(f"Options for {column}: {', '.join(symptom_mapping.keys())}")
        elif column == 'Pain Scale':
            print(f"Enter value for {column} (0 to 10)")
        elif column == 'Onset':
            print(f"Options for {column}: Sudden, Gradual")
        elif column == 'Symptom Duration':
            print(f"Options for {column}: Less than 2 days, 2-5 days, More than 5 days")
        elif column in ['Allergies', 'Medications', 'Travel History', 'Contact with Sick Individuals', 'Smoking']:
            print(f"Options for {column}: Yes, No")
        elif column == 'Chronic Conditions':
            print(f"Options for {column}: {', '.join(chronic_mapping.keys())}")
        elif column == 'Alcohol Consumption':
            print(f"Options for {column}: No, Occasionally, Regularly")
        elif column == 'Physical Activity':
            print(f"Options for {column}: No, Light, Moderate, Intense")
        elif column == 'Stress Levels':
            print(f"Enter value for {column} (1 to 10)")
        elif column == 'Sleep Quality':
            print(f"Options for {column}: Excellent, Good, Fair, Poor")

        value = input(f"Value for {column}: ")  # Take input from user

        # Store the value of 'General Symptoms' separately
        if column == 'General Symptoms':
            general_symptom = value

        user_data[column] = value

    # Process the user inputs and map to appropriate values
    for column in user_data:
        if column == 'Gender':
            user_data[column] = gender_mapping.get(user_data[column].capitalize(), 0)
        elif column == 'General Symptoms':
            user_data[column] = symptom_mapping.get(user_data[column].capitalize(), 1.0)
        elif column == 'Pain Scale':
            user_data[column] = float(user_data[column]) / 10.0
        elif column == 'Onset':
            user_data[column] = onset_mapping.get(user_data[column].capitalize(), 0)
        elif column == 'Symptom Duration':
            user_data[column] = symptom_duration_mapping.get(user_data[column], 0)
        elif column in ['Allergies', 'Medications', 'Travel History', 'Contact with Sick Individuals', 'Smoking']:
            user_data[column] = 1 if user_data[column].capitalize() == 'Yes' else 0
        elif column == 'Chronic Conditions':
            user_data[column] = chronic_mapping.get(user_data[column].lower(), 0)
        elif column == 'Alcohol Consumption':
            user_data[column] = alcohol_mapping.get(user_data[column].capitalize(), 0)
        elif column == 'Physical Activity':
            user_data[column] = physical_mapping.get(user_data[column].capitalize(), 0)
        elif column == 'Stress Levels':
            user_data[column] = float(user_data[column]) / 10.0
        elif column == 'Sleep Quality':
            user_data[column] = sleep_mapping.get(user_data[column].capitalize(), 0)
        else:
            user_data[column] = float(user_data[column])

    # Create DataFrame for the user data
    user_df = pd.DataFrame([user_data], columns=X.columns)

    # Apply scaling to numeric columns
    user_df[numeric_columns] = scaler.transform(user_df[numeric_columns])

    # Make prediction
    severity_prediction = model.predict(user_df)
    severity_class = label_encoder.inverse_transform([np.argmax(severity_prediction)])

    # Output the predicted severity
    print(f"\nPredicted Severity: {severity_class[0]}")

    # If the severity is mild or moderate, display appropriate medications
    if severity_class[0] in ['Mild', 'Moderate'] and general_symptom:
        # Convert the general_symptom to proper format and display medications
        general_symptom = general_symptom.capitalize()
        medications = symptoms_medications.get(general_symptom, "No specific medications available.")
        print(f"\nRecommended medications for {general_symptom}: {medications}")


# Get input from the user and predict severity
get_user_input()

Epoch 1/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.3876 - loss: 1.1439 - val_accuracy: 0.4333 - val_loss: 1.0271
Epoch 2/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5151 - loss: 0.9541 - val_accuracy: 0.7787 - val_loss: 0.6080
Epoch 3/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7471 - loss: 0.5908 - val_accuracy: 0.8907 - val_loss: 0.4081
Epoch 4/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8356 - loss: 0.4403 - val_accuracy: 0.8427 - val_loss: 0.4214
Epoch 5/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8683 - loss: 0.3783 - val_accuracy: 0.9040 - val_loss: 0.3339
Epoch 6/50
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8936 - loss: 0.3177 - val_accuracy: 0.8973 - val_loss: 0.3258
Epoch 7/50
[1m110/110[0m 