Load the model

In [4]:
import pickle
import pandas as pd
import numpy as np

filename = "../saved models/model.pickle"

# Load the trained model
model = pickle.load(open(filename, "rb"))

In [10]:
def predict_disease(symptoms_list, model=model):
    # Read the symptom severity CSV file
    df1 = pd.read_csv('../datasets/Symptom-severity.csv')
    
    # Get the list of all available symptoms
    all_symptoms = df1['Symptom'].unique()
    
    # Create a dictionary of symptom weights
    symptom_weights = dict(zip(df1.Symptom, df1.weight))
    
    # Convert the user input list of symptoms to severity scores
    symptom_scores = [symptom_weights[symptom] for symptom in symptoms_list if symptom in all_symptoms]
    
    # Pad the symptom scores with 0's to match the number of symptoms in the encoded dataset
    max_num_symptoms = 17
    symptom_scores = symptom_scores + [0]*(max_num_symptoms - len(symptom_scores))
    
    # Reshape the symptom scores as a numpy array to match the input format of the SVC model
    symptom_scores = np.array(symptom_scores).reshape(1, -1)
    
    # Make a prediction and obtain the probability estimates for each of the possible classes
    predicted_disease = model.predict(symptom_scores)[0]
    prob_estimates = model.predict_proba(symptom_scores)[0]
    
    # Create a dictionary of disease names and their corresponding index in the probability estimates array
    disease_names = dict(zip(range(len(model.classes_)), model.classes_))
    
    # Create a list of tuples containing the disease name and its corresponding probability estimate
    disease_probs = [(disease_names[i], prob_estimates[i]) for i in range(len(prob_estimates))]
    
    # Sort the disease probability list in descending order of probability estimates
    disease_probs.sort(key=lambda x: x[1], reverse=True)
    
    # Create a formatted string that includes the predicted disease and the list of disease probabilities
    result_str = f"Predicted Disease: {predicted_disease}\n\n"
    result_str += "Top 3 Disease Probabilities:\n"
    for disease, prob in disease_probs[:3]:
        result_str += f"{disease}: {prob * 100:.1f}%\n"
    
    return predicted_disease, result_str

In [15]:
symptoms_list = ['fatigue', 'muscle_wasting', 'weight_gain', 'irritability', 'itching']
predicted_disease, disease_probs = predict_disease(symptoms_list)

print(disease_probs)

Predicted Disease: Psoriasis

Top 3 Disease Probabilities:
Paralysis (brain hemorrhage): 29.2%
Psoriasis: 27.2%
Allergy: 11.9%

