In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import pickle

# Load the dataset
df = pd.read_csv('C:/Users/M Amruth Sai/Downloads/Hydration tracker/archive (2)/dataset.csv')

# Get all unique symptoms
all_symptoms = set()
for col in df.columns[1:]:
    unique_symptoms = df[col].dropna().unique()
    all_symptoms.update(unique_symptoms)
all_symptoms = list(all_symptoms)


In [3]:

# Create a binary matrix for symptoms
X = np.zeros((len(df), len(all_symptoms)))
for i, row in df.iterrows():
    for col in df.columns[1:]:
        symptom = row[col]
        if pd.notna(symptom):
            symptom_idx = all_symptoms.index(symptom)
            X[i, symptom_idx] = 1

# Encode the diseases
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Disease'])
num_classes = len(label_encoder.classes_)
y = to_categorical(y, num_classes=num_classes)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:

# Train the model
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_data=(X_test, y_test),
                    verbose=1)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")


Epoch 1/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.3075 - loss: 2.9941 - val_accuracy: 1.0000 - val_loss: 0.0950
Epoch 2/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9746 - loss: 0.1854 - val_accuracy: 1.0000 - val_loss: 0.0047
Epoch 3/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9956 - loss: 0.0437 - val_accuracy: 1.0000 - val_loss: 0.0015
Epoch 4/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9964 - loss: 0.0264 - val_accuracy: 1.0000 - val_loss: 4.7598e-04
Epoch 5/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9979 - loss: 0.0154 - val_accuracy: 1.0000 - val_loss: 4.3891e-04
Epoch 6/50
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9995 - loss: 0.0096 - val_accuracy: 1.0000 - val_loss: 1.4120e-04
Epoch 7/50
[1m1

In [5]:

# Save the model and necessary objects
model.save('disease_prediction_model.h5')

with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)
    
with open('all_symptoms.pkl', 'wb') as f:
    pickle.dump(all_symptoms, f)

