In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load dataset
data = pd.read_csv('diseases.csv')

# Combine symptoms into a list
data['all_symptoms'] = data.apply(lambda x: [x['Primary Symptoms'], x['Secondary Symptoms'], x['Tertiary Symptoms']], axis=1)

# Encode symptoms
mlb = MultiLabelBinarizer()
X = pd.DataFrame(mlb.fit_transform(data['all_symptoms']), columns=mlb.classes_)
y = data['Disease']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
classifiers = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVC': SVC(kernel='rbf', probability=True, random_state=42),
    'Multinomial NB': MultinomialNB(),
    'Gaussian NB': GaussianNB()
}

# Dictionary to store evaluation results
detailed_results = {}
random_forest_model = None  # Store RandomForest explicitly

# Train and evaluate each model
for name, clf in classifiers.items():
    print(f"\nEvaluating {name}:")
    print("-" * 50)

    # Train the model
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)

    # Compute metrics
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred_test, average='weighted')

    # Store results
    detailed_results[name] = {
        'Training Accuracy': train_accuracy,
        'Testing Accuracy': test_accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }

    # Print results
    print(f"Training Accuracy: {train_accuracy:.2%}")
    print(f"Testing Accuracy: {test_accuracy:.2%}")
    print(f"Precision: {precision:.2%}")
    print(f"Recall: {recall:.2%}")
    print(f"F1 Score: {f1:.2%}")

    # Explicitly save Random Forest model
    if name == "Random Forest":
        random_forest_model = clf

# Convert results to DataFrame
results_df = pd.DataFrame(detailed_results).round(4) * 100
print("\nComparative Results (%):")
print("-" * 50)
print(results_df)

# Save the **RandomForest model** explicitly
if random_forest_model:
    with open('random_forest.pkl', 'wb') as f:
        pickle.dump({'model': random_forest_model, 'mlb': mlb}, f)
    print("\nRandomForest model saved as 'random_forest.pkl'.")

# Load the RandomForest model and MultiLabelBinarizer
with open('random_forest.pkl', 'rb') as f:
    model_data = pickle.load(f)
rf_model = model_data['model']
mlb = model_data['mlb']

# Load the dataset with suggestions
suggestions_data = pd.read_csv('symptoms.csv')  # Ensure this file contains the suggestions

def predict_disease():
    print("\nEnter symptoms (comma-separated): ")
    user_symptoms = input().split(',')

    # Clean and process user input
    user_symptoms = [sym.strip().capitalize() for sym in user_symptoms]

    # Convert symptoms to model input format
    input_vector = np.zeros(len(mlb.classes_))
    for symptom in user_symptoms:
        if symptom in mlb.classes_:
            input_vector[mlb.classes_.tolist().index(symptom)] = 1

    # Predict using the RandomForest model
    predicted_disease = rf_model.predict([input_vector])[0]

    # Fetch suggestions for the predicted disease
    suggestions = suggestions_data[suggestions_data['Disease'] == predicted_disease].iloc[0]

    # Display results
    print(f"\nPredicted Disease: {predicted_disease}")
    print(f"\nDescription: {suggestions['Description']}")
    print(f"\nPrescription: {suggestions['Prescription']}")
    print(f"\nPrecautions: {suggestions['Precautions']}")
    print(f"\nDiet Plan: {suggestions['Diet Plans']}")
    print(f"\nWorkouts: {suggestions['Workouts']}")

# -------------------------------
# Run User Input Prediction
# -------------------------------
predict_disease()


Evaluating Random Forest:
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training Accuracy: 97.71%
Testing Accuracy: 0.00%
Precision: 0.00%
Recall: 0.00%
F1 Score: 0.00%

Evaluating SVC:
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Training Accuracy: 97.71%
Testing Accuracy: 0.00%
Precision: 0.00%
Recall: 0.00%
F1 Score: 0.00%

Evaluating Multinomial NB:
--------------------------------------------------
Training Accuracy: 97.71%
Testing Accuracy: 0.00%
Precision: 0.00%
Recall: 0.00%
F1 Score: 0.00%

Evaluating Gaussian NB:
--------------------------------------------------
Training Accuracy: 97.71%
Testing Accuracy: 0.00%
Precision: 0.00%
Recall: 0.00%
F1 Score: 0.00%

Comparative Results (%):
--------------------------------------------------
                   Random Forest    SVC  Multinomial NB  Gaussian NB
Training Accuracy          97.71  97.71           97.71        97.71
Testing Accuracy            0.00   0.00            0.00         0.00
Precision                   0.00   0.00            0.00         0.00
Recall                      0.00   0.00            0.00         0.00
F1 Score                    0.00   0.00            0.00         0.00

RandomForest model saved as 'random_forest.pkl'.

Enter sympto

