# **Diabetics Prediction Model**

In [None]:
# Install necessary modules
!pip install pycaret

In [None]:
!pip install streamlit

In [3]:
# Import necessary modules
from pycaret.classification import *
import streamlit as st
import pandas as pd
import numpy as np

In [4]:
# Load data
liver_disease_df = pd.read_csv('liver_disease.csv')
liver_disease_df.head(10)

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Liver_Disease
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.0,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.4,1
5,46,Male,1.8,0.7,208,19,14,7.6,4.4,1.3,1
6,26,Female,0.9,0.2,154,16,12,7.0,3.5,1.0,1
7,29,Female,0.9,0.3,202,14,11,6.7,3.6,1.1,1
8,17,Male,0.9,0.3,202,22,19,7.4,4.1,1.2,2
9,55,Male,0.7,0.2,290,53,58,6.8,3.4,1.0,1


In [5]:
# Transform water quality feature into binary (safe or not_safe)
liver_disease_df.Liver_Disease = np.where(liver_disease_df.Liver_Disease>= 2,'liver disease', 'no liver disease')
liver_disease_df.head()

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Liver_Disease
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,no liver disease
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,no liver disease
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,no liver disease
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.0,no liver disease
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.4,no liver disease


In [6]:
len(liver_disease_df[liver_disease_df['Liver_Disease']==2])

0

In [7]:
len(liver_disease_df[liver_disease_df['Liver_Disease']==1])

0

In [8]:
# find the minimum of each column
minValues = print(liver_disease_df.min())

Age                                       4
Gender                               Female
Total_Bilirubin                         0.4
Direct_Bilirubin                        0.1
Alkaline_Phosphotase                     63
Alamine_Aminotransferase                 10
Aspartate_Aminotransferase               10
Total_Protiens                          2.7
Albumin                                 0.9
Albumin_and_Globulin_Ratio              0.3
Liver_Disease                 liver disease
dtype: object


In [9]:
maxValues = print(liver_disease_df.max())

Age                                         90
Gender                                    Male
Total_Bilirubin                           75.0
Direct_Bilirubin                          19.7
Alkaline_Phosphotase                      2110
Alamine_Aminotransferase                  2000
Aspartate_Aminotransferase                4929
Total_Protiens                             9.6
Albumin                                    5.5
Albumin_and_Globulin_Ratio                 2.8
Liver_Disease                 no liver disease
dtype: object


In [10]:
liver_disease_df.describe()

Unnamed: 0,Age,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio
count,583.0,583.0,583.0,583.0,583.0,583.0,583.0,583.0,579.0
mean,44.746141,3.298799,1.486106,290.576329,80.713551,109.910806,6.48319,3.141852,0.947064
std,16.189833,6.209522,2.808498,242.937989,182.620356,288.918529,1.085451,0.795519,0.319592
min,4.0,0.4,0.1,63.0,10.0,10.0,2.7,0.9,0.3
25%,33.0,0.8,0.2,175.5,23.0,25.0,5.8,2.6,0.7
50%,45.0,1.0,0.3,208.0,35.0,42.0,6.6,3.1,0.93
75%,58.0,2.6,1.3,298.0,60.5,87.0,7.2,3.8,1.1
max,90.0,75.0,19.7,2110.0,2000.0,4929.0,9.6,5.5,2.8


In [11]:
liver_disease_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Age                         583 non-null    int64  
 1   Gender                      583 non-null    object 
 2   Total_Bilirubin             583 non-null    float64
 3   Direct_Bilirubin            583 non-null    float64
 4   Alkaline_Phosphotase        583 non-null    int64  
 5   Alamine_Aminotransferase    583 non-null    int64  
 6   Aspartate_Aminotransferase  583 non-null    int64  
 7   Total_Protiens              583 non-null    float64
 8   Albumin                     583 non-null    float64
 9   Albumin_and_Globulin_Ratio  579 non-null    float64
 10  Liver_Disease               583 non-null    object 
dtypes: float64(5), int64(4), object(2)
memory usage: 50.2+ KB


In [12]:
#Applying preprocessing techniques
liver_disease_model = setup(liver_disease_df, target = 'Liver_Disease', session_id = 123, use_gpu = True, fix_imbalance = True,
           normalize = True, polynomial_features = True, trigonometry_features = True, feature_interaction=True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,Liver_Disease
2,Target Type,Binary
3,Label Encoded,"liver disease: 0, no liver disease: 1"
4,Original Data,"(583, 11)"
5,Missing Values,True
6,Numeric Features,9
7,Categorical Features,1
8,Ordinal Features,False
9,High Cardinality Features,False


INFO:logs:create_model_container: 0
2022-09-29 23:37:40.113 INFO    logs: create_model_container: 0
INFO:logs:master_model_container: 0
2022-09-29 23:37:40.118 INFO    logs: master_model_container: 0
INFO:logs:display_container: 1
2022-09-29 23:37:40.121 INFO    logs: display_container: 1
INFO:logs:Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=True, features_todrop=[],
                                      id_columns=[],
                                      ml_usecase='classification',
                                      numerical_features=[],
                                      target='Liver_Disease',
                                      time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='not_available',
                                fill_value_categorical=None,
                                fill_value_numeric

In [13]:
liver_disease_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.7033,0.7356,0.7897,0.7922,0.7904,0.2809,0.2823,0.975
gbc,Gradient Boosting Classifier,0.6935,0.7276,0.7793,0.7885,0.7822,0.2607,0.2643,0.486
ada,Ada Boost Classifier,0.6912,0.7362,0.7414,0.8082,0.7715,0.2935,0.2982,0.198
lr,Logistic Regression,0.6889,0.7568,0.6793,0.8531,0.7547,0.3422,0.3601,0.07
et,Extra Trees Classifier,0.6861,0.7237,0.7966,0.771,0.7819,0.214,0.217,1.083
svm,SVM - Linear Kernel,0.6789,0.0,0.7448,0.7947,0.7651,0.2478,0.2557,0.028
lightgbm,Light Gradient Boosting Machine,0.6713,0.7195,0.7655,0.7706,0.7665,0.2068,0.2095,0.154
ridge,Ridge Classifier,0.6593,0.0,0.6276,0.8532,0.721,0.3088,0.3332,0.061
lda,Linear Discriminant Analysis,0.6544,0.7325,0.6207,0.8512,0.7154,0.3019,0.3263,0.043
dt,Decision Tree Classifier,0.63,0.5634,0.7207,0.7506,0.7344,0.1214,0.1232,0.036


INFO:logs:create_model_container: 14
2022-09-29 23:38:22.486 INFO    logs: create_model_container: 14
INFO:logs:master_model_container: 14
2022-09-29 23:38:22.492 INFO    logs: master_model_container: 14
INFO:logs:display_container: 2
2022-09-29 23:38:22.495 INFO    logs: display_container: 2
INFO:logs:RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=123, verbose=0,
                       warm_start=False)
2022-09-29 23:38:22.499 INFO    logs: RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', 

In [14]:
liver_disease_model = create_model('et')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6585,0.6394,0.7241,0.7778,0.75,0.2137,0.2151
1,0.6341,0.7141,0.7931,0.7188,0.7541,0.0465,0.0474
2,0.7561,0.7371,0.8621,0.8065,0.8333,0.3807,0.3836
3,0.6829,0.75,0.7586,0.7857,0.7719,0.2525,0.2529
4,0.6585,0.7486,0.7931,0.7419,0.7667,0.1329,0.134
5,0.7805,0.7399,0.8966,0.8125,0.8525,0.4279,0.4359
6,0.7317,0.7989,0.8621,0.7812,0.8197,0.3008,0.3064
7,0.6585,0.7443,0.6897,0.8,0.7407,0.2487,0.2546
8,0.625,0.6865,0.7241,0.75,0.7368,0.0854,0.0855
9,0.675,0.6787,0.8621,0.7353,0.7937,0.0511,0.0549


INFO:logs:create_model_container: 15
2022-09-29 23:38:32.906 INFO    logs: create_model_container: 15
INFO:logs:master_model_container: 15
2022-09-29 23:38:32.911 INFO    logs: master_model_container: 15
INFO:logs:display_container: 3
2022-09-29 23:38:32.918 INFO    logs: display_container: 3
INFO:logs:ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=123, verbose=0,
                     warm_start=False)
2022-09-29 23:38:32.923 INFO    logs: ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, ma

In [15]:
save_model(liver_disease_model, 'liver_disease_deployment_model')

INFO:logs:Initializing save_model()
2022-09-29 23:38:33.085 INFO    logs: Initializing save_model()
INFO:logs:save_model(model=ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='auto',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=123, verbose=0,
                     warm_start=False), model_name=liver_disease_deployment_model, prep_pipe_=Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=True, features_todrop=[],
                                      id_columns=[],
                                      ml_useca

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[],
                                       target='Liver_Disease',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeri...
                  ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0,
                                       class_weight=None, criterion='gini',
                                       max_depth=None, max_features='auto',
            

In [16]:
#Creating the web app with streamlit
from pycaret.classification import load_model, predict_model
import streamlit as st
import pandas as pd
import numpy as np


def predict_quality(model, df):
    
    predictions_data = predict_model(estimator = model, data = df)
    
    return predictions_data['Label'][0]
    
model = load_model('liver_disease_deployment_model')

st.title("Liver Disease Predictive Framework")
st.write('This framework is created to predict liver disease in patients by Okeke')

Age = st.sidebar.slider('Age', 
                        min_value=1.00, 
                        max_value=150.00, 
                        value=30.00, 
                        step = 1.00)

Gender = st.selectbox('Gender', ['Male', 'Female', 'Other'])

Total_Bilirubin = st.number_input('Total_Bilirubin', 
                          min_value=0.00, 
                          max_value=100.00, 
                          value=45.00)

Direct_Bilirubin = st.number_input('Direct_Bilirubin', 
                                min_value=0.00, 
                                max_value=50.00, 
                                value=15.00)

Alkaline_Phosphotase = st.number_input('Alkaline_Phosphotase', 
                                min_value=50.00, 
                                max_value=3000.00, 
                                value=100.00)

Alamine_Aminotransferase = st.number_input('Alamine_Aminotransferase', 
                          min_value=1.00, 
                          max_value=2500.00, 
                          value=100.00)

Aspartate_Aminotransferase = st.number_input('Aspartate_Aminotransferase', 
                          min_value=1.00, 
                          max_value=5000.00, 
                          value=500.00)

Total_Protiens = st.sidebar.slider('Total_Protiens', 
                                min_value=0.10, 
                                max_value=10.00, 
                                value=6.00, 
                                step = 0.10)

Albumin = st.sidebar.slider('Albumin', 
                            min_value=0.50, 
                            max_value=10.00, 
                            value=2.00, 
                            step = 0.10)

Albumin_and_Globulin_Ratio = st.sidebar.slider('Albumin_and_Globulin_Ratio', 
                            min_value=0.10, 
                            max_value=5.00, 
                            value=1.50, 
                            step = 0.10)

features = {'Age': Age, 'Gender': Gender, 'Total_Bilirubin': Total_Bilirubin, 'Direct_Bilirubin': Direct_Bilirubin,
            'Alkaline_Phosphotase': Alkaline_Phosphotase, 'Alamine_Aminotransferase': Alamine_Aminotransferase,
            'Aspartate_Aminotransferase': Aspartate_Aminotransferase, 'Total_Protiens': Total_Protiens,
            'Albumin': Albumin, 'Albumin_and_Globulin_Ratio': Albumin_and_Globulin_Ratio
            }
 

features_df  = pd.DataFrame([features])

st.table(features_df)  

if st.button('Predict'):
    
    prediction = predict_quality(model, features_df)
    
    st.write('Based on values provided, the patient has'+ str(prediction))

INFO:logs:Initializing load_model()
2022-09-29 23:38:33.375 INFO    logs: Initializing load_model()
INFO:logs:load_model(model_name=liver_disease_deployment_model, platform=None, authentication=None, verbose=True)
2022-09-29 23:38:33.380 INFO    logs: load_model(model_name=liver_disease_deployment_model, platform=None, authentication=None, verbose=True)


Transformation Pipeline and Model Successfully Loaded


  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]
2022-09-29 23:38:33.693 
  command:

    streamlit run /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py [ARGUMENTS]
