In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder

In [None]:

dataset = pd.read_csv('Training_new.csv').dropna(axis=1)

X = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']
encoder = LabelEncoder()
dataset["prognosis"] = encoder.fit_transform(dataset["prognosis"])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)


X_train_small, _, y_train_small, _ = train_test_split(X_train, y_train, test_size=0.5, random_state=42)

#random forest
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_small, y_train_small)
rf_predictions_train = rf_classifier.predict(X_train)
rf_cm_train = confusion_matrix(y_train, rf_predictions_train)

joblib.dump(rf_classifier, 'rf_model.pkl')

#SVM
svm_classifier = SVC()
svm_classifier.fit(X_train_small, y_train_small)
svm_predictions_train = svm_classifier.predict(X_train)
svm_cm_train = confusion_matrix(y_train, svm_predictions_train)


joblib.dump(svm_classifier, 'svm_model.pkl')

#GaussianNB
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_small, y_train_small)
nb_predictions_train = nb_classifier.predict(X_train)
nb_cm_train = confusion_matrix(y_train, nb_predictions_train)


joblib.dump(nb_classifier, 'nb_model.pkl')

In [None]:
#accuracy
rf_accuracy_train = accuracy_score(y_train, rf_predictions_train)
svm_accuracy_train = accuracy_score(y_train, svm_predictions_train)
nb_accuracy_train = accuracy_score(y_train, nb_predictions_train)

In [None]:

print("\nRandom Forest Accuracy (Training):", rf_accuracy_train)
print("SVM Accuracy (Training):", svm_accuracy_train)
print("Gaussian Naive Bayes Accuracy (Training):", nb_accuracy_train)

In [None]:

def multiclass_to_2x2_confusion_matrix(cm):
    
    TP = cm.diagonal().sum()
    
   
    total = cm.sum()
    

    FP = cm.sum(axis=0).sum() - TP
    
    
    FN = cm.sum(axis=1).sum() - TP
    
    
    TN = total - TP - FP - FN
    
    
    return TP, FP, FN, TN


rf_TP, rf_FP, rf_FN, rf_TN = multiclass_to_2x2_confusion_matrix(rf_cm_train)
svm_TP, svm_FP, svm_FN, svm_TN = multiclass_to_2x2_confusion_matrix(svm_cm_train)
nb_TP, nb_FP, nb_FN, nb_TN = multiclass_to_2x2_confusion_matrix(nb_cm_train)

#random forest cm
def visualize_2x2_confusion_matrix(rf_TP, rf_FP, rf_FN, rf_TN):
    print(f"                   |Actual Positive | Actual Negative")
    print(f"Predicted Positive | TP = {rf_TP:<3}   | FP = {rf_FP:<3}")
    print(f"Predicted Negative | FN = {rf_FN:<3}   | TN = {rf_TN:<3}")

visualize_2x2_confusion_matrix(rf_TP, rf_FP, rf_FN, rf_TN)
print("Random Forest Confusion Matrix (Training):")
print(f"True Positive: {rf_TP:<3}, False Positive: {rf_FP:<3}, False Negative: {rf_FN:<3}, True Negative: {rf_TN:<3}")
print("\n")

#svm cm
def visualize_2x2_confusion_matrix(svm_TP, svm_FP, svm_FN, svm_TN):
    print(f"                   |Actual Positive | Actual Negative")
    print(f"Predicted Positive | TP = {svm_TP:<3}   | FP = {svm_FP:<3}")
    print(f"Predicted Negative | FN = {svm_FN:<3}   | TN = {svm_TN:<3}")

visualize_2x2_confusion_matrix(svm_TP, svm_FP, svm_FN, svm_TN)
print("\nSVM Confusion Matrix (Training):")
print(f"True Positive: {svm_TP:<3}, False Positive: {svm_FP:<3}, False Negative: {svm_FN:<3}, True Negative: {svm_TN:<3}")
print("\n")


#GaussianNB cm
def visualize_2x2_confusion_matrix(nb_TP, nb_FP, nb_FN, nb_TN):
    print(f"                   |Actual Positive | Actual Negative")
    print(f"Predicted Positive | TP = {nb_TP:<3}   | FP = {nb_FP:<3}")
    print(f"Predicted Negative | FN = {nb_FN:<3}   | TN = {nb_TN:<3}")

visualize_2x2_confusion_matrix(nb_TP, nb_FP, nb_FN, nb_TN)
print("\nGaussian Naive Bayes Confusion Matrix (Training):")
print(f"True Positive: {nb_TP:<3}, False Positive: {nb_FP:<3}, False Negative: {nb_FN:<3}, True Negative: {nb_TN:<3}")
print("\n")




In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
import joblib

# Load the trained models
rf_classifier = joblib.load('rf_model.pkl')
svm_classifier = joblib.load('svm_model.pkl')
nb_classifier = joblib.load('nb_model.pkl')

# Calculate predictions for training data
rf_predictions_train = rf_classifier.predict(X_train)
svm_predictions_train = svm_classifier.predict(X_train)
nb_predictions_train = nb_classifier.predict(X_train)

# Calculate confusion matrices for training data
rf_cm_train = confusion_matrix(y_train, rf_predictions_train)
svm_cm_train = confusion_matrix(y_train, svm_predictions_train)
nb_cm_train = confusion_matrix(y_train, nb_predictions_train)

# Calculate metrics for Random Forest model
rf_precision = precision_score(y_train, rf_predictions_train, average='weighted', zero_division=1)
rf_recall = recall_score(y_train, rf_predictions_train, average='weighted')
rf_f_measure = f1_score(y_train, rf_predictions_train, average='weighted')
rf_accuracy = accuracy_score(y_train, rf_predictions_train)

# Calculate metrics for SVM model
svm_precision = precision_score(y_train, svm_predictions_train, average='weighted', zero_division=1)
svm_recall = recall_score(y_train, svm_predictions_train, average='weighted')
svm_f_measure = f1_score(y_train, svm_predictions_train, average='weighted')
svm_accuracy = accuracy_score(y_train, svm_predictions_train)

# Calculate metrics for Gaussian Naive Bayes model
nb_precision = precision_score(y_train, nb_predictions_train, average='weighted', zero_division=1)
nb_recall = recall_score(y_train, nb_predictions_train, average='weighted')
nb_f_measure = f1_score(y_train, nb_predictions_train, average='weighted')
nb_accuracy = accuracy_score(y_train, nb_predictions_train)

# Print the metrics for each model
print("Random Forest Model Metrics:")
print(f"Precision: {rf_precision:.2f}")
print(f"Recall: {rf_recall:.2f}")
print(f"F-measure: {rf_f_measure:.2f}")
print(f"Accuracy: {rf_accuracy:.2f}")
print("\n")

print("SVM Model Metrics:")
print(f"Precision: {svm_precision:.2f}")
print(f"Recall: {svm_recall:.2f}")
print(f"F-measure: {svm_f_measure:.2f}")
print(f"Accuracy: {svm_accuracy:.2f}")
print("\n")

print("Gaussian Naive Bayes Model Metrics:")
print(f"Precision: {nb_precision:.2f}")
print(f"Recall: {nb_recall:.2f}")
print(f"F-measure: {nb_f_measure:.2f}")
print(f"Accuracy: {nb_accuracy:.2f}")


In [None]:
import joblib
import numpy as np
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder
import warnings


rf_model = joblib.load('rf_model.pkl')
svm_model = joblib.load('svm_model.pkl')
nb_model = joblib.load('nb_model.pkl')


label_encoder = LabelEncoder()
label_encoder.fit(y.unique())
symptoms = X.columns.values
prediction_classes = encoder.classes_
symptom_index = {}
for index, value in enumerate(symptoms):
    symptom = " ".join([i.capitalize() for i in value.split("_")])
    symptom_index[symptom] = index
data_dict = {
    "symptom_index": symptom_index,
    "predictions_classes": prediction_classes
}


def predict_disease(symptoms):
    input_data = [0] * len(data_dict["symptom_index"])  
    for symptom in symptoms:
        index = data_dict["symptom_index"].get(symptom.capitalize())
        if index is not None:
            input_data[index] = 1

    input_data = np.array(input_data).reshape(1, -1)

 
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        rf_prediction = rf_model.predict(input_data)
        nb_prediction = nb_model.predict(input_data)
        svm_prediction = svm_model.predict(input_data)

    
    rf_prediction_label = label_encoder.transform(rf_prediction)[0]
    nb_prediction_label = label_encoder.transform(nb_prediction)[0]
    svm_prediction_label = label_encoder.transform(svm_prediction)[0]

 
    predictions = [rf_prediction_label, nb_prediction_label, svm_prediction_label]
    if len(set(predictions)) == 1:
        final_prediction = predictions[0]  
    else:
        final_prediction = mode(predictions)[0][0]

   
    final_prediction = label_encoder.inverse_transform([final_prediction])[0]
    
    output = {
        "Random Forest": label_encoder.inverse_transform([rf_prediction_label])[0],
        "SVM": label_encoder.inverse_transform([svm_prediction_label])[0],
        "Gaussian Naive Bayes": label_encoder.inverse_transform([nb_prediction_label])[0],
        "Final Prediction": final_prediction
    }

    return output


In [None]:

symptoms = ["continuous_sneezing", "fatigue", "cough", "high_fever", "headache"]


predictions = predict_disease(symptoms)


for model, disease in predictions.items():
    print(f"{model} Prediction:", disease)
