<a href="https://colab.research.google.com/github/Dila1998/Diabetes-Prediction-BSc-final-year-project-2024-/blob/main/DB_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import Libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
from IPython.display import display
from matplotlib import pyplot
from pandas.plotting import scatter_matrix
from sklearn import linear_model, metrics, model_selection
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    accuracy_score,
    classification_report,
    confusion_matrix,
)
from sklearn.model_selection import KFold, cross_val_score, train_test_split

In [None]:
import joblib

# Save the model to a file
#joblib.dump(mylog_model, 'diabetes-classifier-log_model.pkl')

# Later, load the model from the file with the following:
mylog_model = joblib.load('DB_pre_model.pkl')

In [None]:
# USER INTERFACE: form for input for patient prediction
import ipywidgets as widgets

# 'features_dict' is a dictionary mapping feature to description
features_dict = {
    "Sex": "Sex:",
    "Age": "Age category (1 = Age 18 - 24, 2 = Age 25 to 29, 3 = Age 30 to 34, 4 = Age 35 to 39, 5 = Age 40 to 44, 6 = Age 45 to 49, 7 = Age 50 to 54, 8 = Age 55 to 59, 9 = Age 60 to 64, 10 = Age 65 to 69, 11 = Age 70 to 74, 12 = Age 75 to 79, 13 = Age 80 or older):",
    "BMI": "Body Mass Index (BMI is less than 18.5= 1, BMI is 18.6 to 24.9= 2, BMI is 25.0 to 29.9= 3, 30.0 or higher= 4):",
    "HighBP": "High Blood Pressure",
    "HighChol": "High Cholesterol",
    "CholCheck": "cholesterol check in 5 years",
    "Smoker": "Have you smoked at least 100 cigarettes in your life?",
    "HvyAlcoholConsump": "Heavy drinkers (drinks <14 for men, <7 for women per week",
    "Stroke": "(Ever told) you had a Stroke?",
    "HeartDiseaseorAttack": "Heart Disease or Attack (CHD or MI)",
    "GenHlth": "General Health scale :",
    "MentHlth": "How many past days was your Mental Health not good?(0 to 9 days= 0, 10 to 20 days=1, 20 to 31 days=2 ):",
    "PhysHlth": "How many past days was your Physical Health not good?(0 to 9 days= 0, 10 to 20 days=1, 20 to 31 days=2 ):",
    "DiffWalk": "Do you have Difficulty Walking or climbing stairs?",
    "PhysActivity": "Physical Activity in past 30 days, not incl job",
    "AnyHealthcare": "Have any kind of health care coverage, including health insurance, prepaid plans such as HMO, etc.",
    "NoDocbcCost": "Was there a time in the past 12 months when you needed to see a doctor but could not because of cost? ",
    "Fruits": "Eat 1 Fruit or more per day",
    "Veggies": "Eat Veggies 1 or more per day",
    "Income": "Income level scale 1-8:(1 = less than 10,000$ 5 = less than 35,000$ 11 = 200,000$ or more.)",
    "Education": "Education level:scale 1-6: (1 = Never attended school or only kindergarten 2 = Grades 1 through 8, 3 = Grades 9 - 11 (Some high school), 4 = Grade 12 or GED (High school graduate), 5 = College 1 year to 3 years (Some college or technical school), 6 = College 4 years or more (College graduate))",
}
widgets_dict = {}

# Create widgets for each feature
for item in features_dict.keys():
    if item in [
        "HighBP",
        "HighChol",
        "CholCheck",
        "Smoker",
        "Stroke",
        "HeartDiseaseorAttack",
        "PhysActivity",
        "AnyHealthcare",
        "NoDocbcCost",
        "Fruits",
        "Veggies",
        "HvyAlcoholConsump",
        "DiffWalk",
    ]:
        # Binary features: create a dropdown with options 'Yes' and 'No'
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.RadioButtons(
            options={"No": 0, "Yes": 1},
            value=0,
        )
    if item in ["Sex"]:
        # Create a dropdown with options 'Male' and 'Female'
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.Dropdown(
            options=[("Female", 0), ("Male", 1)],
            value=0,
        )
    # Numerical features: create float sliders
    if item in ["BMI"]:
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.FloatSlider(
            value=0,
            min=1,
            max=4,
            step=1,
        )
    if item in ["GenHlth"]:
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.FloatSlider(
            value=3,
            min=1,
            max=5,
            step=1,
        )
    if item in ["MentHlth", "PhysHlth"]:
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.FloatSlider(
            value=0,
            min=0,
            max=2,
            step=1,
        )
    if item in ["Age"]:
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.FloatSlider(
            value=8,
            min=1,
            max=13,
            step=1,
        )

    if item in ["Income"]:
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.FloatSlider(
            value=8,
            min=1,
            max=11,
            step=1,
        )

    if item in ["Education"]:
        widgets_dict[item + "_label"] = widgets.Label(
            features_dict.get(item), layout={"width": "max-content"}
        )
        widgets_dict[item] = widgets.FloatSlider(
            value=3,
            min=1,
            max=6,
            step=1,
        )

# Button to make prediction
predict_btn = widgets.Button(description="Predict Patient Risk")

# Output widget to display prediction result
output = widgets.Output()


def on_predict_btn_clicked(b):
    # Prepare the input for the model
    input_data = [widgets_dict[feature].value for feature in features_dict.keys()]
    input_data = np.array(input_data).reshape(1, -1)

    # Create a DataFrame with input_data and assign column names using features
    input_df = pd.DataFrame(input_data, columns=features_dict.keys())

    # Define the desired order of columns
    new_column_order = ['HighBP', 'HighChol', 'CholCheck', 'BMI', 'Smoker', 'Stroke', 'HeartDiseaseorAttack',
                'PhysActivity','Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost',
                'GenHlth', 'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex', 'Age', 'Education', 'Income',
                ]

    # Reorder the columns of the DataFrame
    input_df = input_df[new_column_order]

    feature_list = []

    HighBP = input_df['HighBP'].iloc[0]
    HighChol = input_df['HighChol'].iloc[0]
    CholCheck = input_df['CholCheck'].iloc[0]
    BMI = input_df['BMI'].iloc[0]
    Smoker = input_df['Smoker'].iloc[0]
    Stroke = input_df['Stroke'].iloc[0]
    HeartDiseaseorAttack = input_df['HeartDiseaseorAttack'].iloc[0]
    PhysActivity = input_df['PhysActivity'].iloc[0]
    Fruits = input_df['Fruits'].iloc[0]
    Veggies = input_df['Veggies'].iloc[0]
    HvyAlcoholConsump = input_df['HvyAlcoholConsump'].iloc[0]
    AnyHealthcare = input_df['AnyHealthcare'].iloc[0]
    NoDocbcCost = input_df['NoDocbcCost'].iloc[0]
    GenHlth = input_df['GenHlth'].iloc[0]
    MentHlth = input_df['MentHlth'].iloc[0]
    PhysHlth = input_df['PhysHlth'].iloc[0]
    DiffWalk = input_df['DiffWalk'].iloc[0]
    Sex = input_df['Sex'].iloc[0]
    Age = input_df['Age'].iloc[0]
    Education = input_df['Education'].iloc[0]
    Income = input_df['Income'].iloc[0]

    HighBP_list = [1, 0]
    HighChol_list = [1, 0]
    CholCheck_list = [1, 0]
    BMI_list = [1, 2, 3, 4]
    Smoker_list = [1, 0]
    Stroke_list = [1, 0]
    HeartDiseaseorAttack_list = [1, 0]
    PhysActivity_list = [1, 0]
    Fruits_list = [1, 0]
    Veggies_list = [1, 0]
    HvyAlcoholConsump_list = [1, 0]
    AnyHealthcare_list = [1, 0]
    NoDocbcCost_list = [1, 0]
    GenHlth_list = [1, 2, 3, 4, 5]
    MentHlth_list = [0, 1, 2]
    PhysHlth_list = [0, 1, 2]
    DiffWalk_list = [1, 0]
    Sex_list = [1, 0]
    Age_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
    Education_list = [1, 2, 3, 4, 5, 6]
    Income_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

    def col_match(lst, value):
        for item in lst:
            if item == value:
                feature_list.append(1)
            else:
                feature_list.append(0)

    col_match(HighBP_list, HighBP)
    col_match(HighChol_list, HighChol)
    col_match(CholCheck_list, CholCheck)
    col_match(BMI_list, BMI)
    col_match(Smoker_list, Smoker)
    col_match(Stroke_list, Stroke)
    col_match(HeartDiseaseorAttack_list, HeartDiseaseorAttack)
    col_match(PhysActivity_list, PhysActivity)
    col_match(Fruits_list, Fruits)
    col_match(Veggies_list, Veggies)
    col_match(HvyAlcoholConsump_list, HvyAlcoholConsump)
    col_match(AnyHealthcare_list, AnyHealthcare)
    col_match(NoDocbcCost_list, NoDocbcCost)
    col_match(GenHlth_list, GenHlth)
    col_match(MentHlth_list, MentHlth)
    col_match(PhysHlth_list, PhysHlth)
    col_match(DiffWalk_list, DiffWalk)
    col_match(Sex_list, Sex)
    col_match(Age_list, Age)
    col_match(Education_list, Education)
    col_match(Income_list, Income)

    column_names = ['HighBP_0', 'HighBP_1', 'HighChol_0', 'HighChol_1', 'CholCheck_0', 'CholCheck_1', 'BMI_1', 'BMI_2', 'BMI_3', 'BMI_4', 'Smoker_0', 'Smoker_1', 'Stroke_0', 'Stroke_1', 'HeartDiseaseorAttack_0', 'HeartDiseaseorAttack_1', 'PhysActivity_0', 'PhysActivity_1', 'Fruits_0', 'Fruits_1', 'Veggies_0', 'Veggies_1', 'HvyAlcoholConsump_0', 'HvyAlcoholConsump_1', 'AnyHealthcare_0', 'AnyHealthcare_1', 'NoDocbcCost_0', 'NoDocbcCost_1', 'GenHlth_1', 'GenHlth_2', 'GenHlth_3', 'GenHlth_4', 'GenHlth_5', 'MentHlth_0', 'MentHlth_1', 'MentHlth_2', 'PhysHlth_0', 'PhysHlth_1', 'PhysHlth_2', 'DiffWalk_0', 'DiffWalk_1', 'Sex_0', 'Sex_1', 'Age_1', 'Age_2', 'Age_3', 'Age_4', 'Age_5', 'Age_6', 'Age_7', 'Age_8', 'Age_9', 'Age_10', 'Age_11', 'Age_12', 'Age_13', 'Education_1', 'Education_2', 'Education_3', 'Education_4', 'Education_5', 'Education_6', 'Income_1', 'Income_2', 'Income_3', 'Income_4', 'Income_5', 'Income_6', 'Income_7', 'Income_8', 'Income_9', 'Income_10', 'Income_11']

    # Create a DataFrame with a single row using feature_list and specified column names
    new_df = pd.DataFrame([feature_list], columns=column_names)

    #### Make prediction ####
    prediction = mylog_model.predict(new_df)

    # Display prediction
    with output:
        output.clear_output()
        if prediction[0] == 0:
            print("Prediction: Not at risk of diabetes")
        elif prediction[0] == 1:
            print("Prediction: Diabetes positive")
        elif prediction[0] == 2:
            print("Prediction: At risk of diabetes")

predict_btn.on_click(on_predict_btn_clicked)

# Display widgets
for widget in widgets_dict.values():
    display(widget)
display(predict_btn, output)

Label(value='Sex:', layout=Layout(width='max-content'))

Dropdown(options=(('Female', 0), ('Male', 1)), value=0)

Label(value='Age category (1 = Age 18 - 24, 2 = Age 25 to 29, 3 = Age 30 to 34, 4 = Age 35 to 39, 5 = Age 40 t…

FloatSlider(value=8.0, max=13.0, min=1.0, step=1.0)

Label(value='Body Mass Index (BMI is less than 18.5= 1, BMI is 18.6 to 24.9= 2, BMI is 25.0 to 29.9= 3, 30.0 o…

FloatSlider(value=1.0, max=4.0, min=1.0, step=1.0)

Label(value='High Blood Pressure', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='High Cholesterol', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='cholesterol check in 5 years', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Have you smoked at least 100 cigarettes in your life?', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Heavy drinkers (drinks <14 for men, <7 for women per week', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='(Ever told) you had a Stroke?', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Heart Disease or Attack (CHD or MI)', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='General Health scale :', layout=Layout(width='max-content'))

FloatSlider(value=3.0, max=5.0, min=1.0, step=1.0)

Label(value='How many past days was your Mental Health not good?(0 to 9 days= 0, 10 to 20 days=1, 20 to 31 day…

FloatSlider(value=0.0, max=2.0, step=1.0)

Label(value='How many past days was your Physical Health not good?(0 to 9 days= 0, 10 to 20 days=1, 20 to 31 d…

FloatSlider(value=0.0, max=2.0, step=1.0)

Label(value='Do you have Difficulty Walking or climbing stairs?', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Physical Activity in past 30 days, not incl job', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Have any kind of health care coverage, including health insurance, prepaid plans such as HMO, etc…

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Was there a time in the past 12 months when you needed to see a doctor but could not because of c…

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Eat 1 Fruit or more per day', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Eat Veggies 1 or more per day', layout=Layout(width='max-content'))

RadioButtons(options={'No': 0, 'Yes': 1}, value=0)

Label(value='Income level scale 1-8:(1 = less than 10,000$ 5 = less than 35,000$ 11 = 200,000$ or more.)', lay…

FloatSlider(value=8.0, max=11.0, min=1.0, step=1.0)

Label(value='Education level:scale 1-6: (1 = Never attended school or only kindergarten 2 = Grades 1 through 8…

FloatSlider(value=3.0, max=6.0, min=1.0, step=1.0)

Button(description='Predict Patient Risk', style=ButtonStyle())

Output()