In [None]:
import pandas as pd                                    # Doing this to import "pandas" which is one of the python packages
                                                       # and shorten it as "pd" word to read + manage + analyze my data.
    
from sklearn.linear_model import LogisticRegression    # "SKLEARN" is a Machine learning library in python. It helps to 
                                                       # create a model we are interesting in doing predictive data analysis.    
                                                       
                                                       # Extracting Logistic Regression model function from linear model 
                                                       # which is a class from the sklearn library to build a desire model or
                                                       # predict the relationship between target and predictor variables.
                                
from sklearn.metrics import accuracy_score             # Extracting "accuracy_score" function to check how accurate the model 
                                                       # I am build is from ".metrics" classification within the sklearn 
                                                       # library.
    
import pickle                                          # "Pickle" is a library to help keeping an object in Python as  
                                                       # a file form by changing it into Byte Stream and convert back to 
                                                       # the same object when wanting to use it again (Object Serialisation).
        
                                                       # This helps making the data less complicated to use.
        
df = pd.read_csv('heart_disease.csv')                  # df = DataFrame

                                                       # Use pd.read_csv to download and read the dataset in .csv file 
                                                       # from the system which is the 'heart_disease.csv' file in this case.

# ".iloc" is one of the pandas function use to specifically choose the row and column that I want to use. [row, column]==> 
# [:,1:len()] means that I will use all rows that the .csv file has --> [:, ]and columns since the second column
# (as I will not using the 0 column which is = target variable) to the last column --> [ ,1:len()]  
# (by using len function to count the number of columns) into the model that I will be building. 
X = df.iloc[:,1:len(df.columns)]                       # left side (X) can name whatever I want to name to instantiate
                                                       # the right side which is all the predictor variables.
    
                                                       # Defining predictor variables (named'X') that I want to analyze by 
                                                       # containing all the featured characteristics that can determine 
                                                       # Heart Disease (choosing every rows + every columns except the target 
                                                       # variable (which I named 'y')) as I will need to check the accuracy  
                                                       # of the model together with target variable (heart_disease column).
                
# use ".iloc" to specifically choose the row and column that I want to use. [row, column]==> [:,0] means that I will use all 
# rows that the .csv file has --> [:, ] and the only first column (column 0) --> [ ,0] as it is the target variable that I
# want to predict.
y = df.iloc[:,0]                                       # Named "y" in the left side as a instantiate of target variable.

                                                       # Defining Target variables (name'y') that is the output of
                                                       # the prediction by only choosing the first column in every rows    
                                                       # (heart_disease column) as I will need to check the accuracy
                                                       # of the model together with predictor variables.
                
# Why Upper case 'X' and 'y'? I found out that inconclusion, it is because of Descartes said so.

model = LogisticRegression(max_iter=800)               # From the .csv file, I want to predict the relationship between
                                                       # all predictor variables from Heart Disease information (X) and target 
                                                       # variable which is whether the person has a Heart Disease or not 
                                                       # (heart_disease column). Therefore, using LogisticRegression will 
                                                       # help achieving that. 
                
                                                       # I also indicate the 'max_iter' parameter by 800 to iteratively
                                                       # get model to be trained by 800 times. 
                        
                                                       # It is still unclear to me how to determine the number of times 
                                                       # this parameter should have to improve the accuracy of model?
                                                       # The answer I got is that I should try a multiple number to find 
                                                       # a proper max_iter as the more max_iter I predictor is not determine 
                                                       # the accuracy of the model I build.
                        
                                                       # "model" is instantiate the relationship between all features 
                                                       # associate with Heart Disease (X) and whether the person has 
                                                       # Heart Disease or not (y).

model.fit(X,y)                                         # ".fit()" is a method using to build a model and estimate parameters 
                                                       # in the model.
    
                                                       # Is this method helps fit (or train) the model to achieve a 
                                                       # better accuracy?
                                                       # I found out that it does! According to the website: it is always 
                                                       # required to fit the model after normalizing to improve the accuracy 
                                                       # of the model.
                                                       
                                                       # Once defined a model, use .fit() to look into (X,y) and find 
                                                       # the parameters for 'model' base on (X,y). This step allow me to get 
                                                       # Heart Disease model consists of all the parameters(X) which will
                                                       # allow me to find the value y.

predictions = model.predict(X)                         # predictions is the instantiate name for = model.predict(x).

                                                       # ".predict()" is a method using given features predictor variables to 
                                                       # return the predicted labels which is y or a Target variable 
                                                       # (Whether the person has a Heart Disease or not?).
            
print(accuracy_score(y,predictions))                   # print() is the function in python to show the result out.

                                                       # In this case, I want to know the accuracy_score of target value
                                                       # which is instantiate with y. The result will show in 0-1 scale.

pickle_out = open('classifier', mode='wb')             # I am creating a new object called pickle_out. This pickle_out 
                                                       # object will have a file name 'classifier'. This file will have 
                                                       # a mode of 'wb' which means I am writing to the file (w) in binary
                                                       # mode (b).
            
pickle.dump(model, pickle_out)                         # ".dump()" is a method of dumping or putting data into the pickle 
                                                       # object that I created. In this case, I will dump my model which
                                                       # is instantiate from the LogisticRegression model that I built and
                                                       # check the accuracy out into the "pickle_out" object that I just 
                                                       # created above.
                
pickle_out.close()                                     # Use ".close()" method after I have wrtten my python into the 
                                                       # pickle_out object. 

In [None]:
%%writefile app.py

import pickle
import streamlit as st

pickle_in = open('classifier', 'rb')
classifier = pickle.load(pickle_in)

@st.cache()

# Define the function which will make the prediction using data
# inputs from users
def prediction(age, sex, non_anginal_pain, 
               max_heart_rate, exercise_induced_angina):
    
    # Make predictions
    prediction = classifier.predict(
        [[age, sex, non_anginal_pain, max_heart_rate, exercise_induced_angina]])
    
    if prediction == 0:
        pred = 'You are probably NOT having a Heart Disease'
    else:
        pred = 'PLEASE SEE A DOCTOR! You might have a Heart Disease!'
    return pred

# This is the main function in which we define our webpage
def main():
    
    # Create input fields
    age = st.number_input("Age in Years",
                          min_value=0,
                          max_value=120,
                          value=54,
                          step=1
                         )
    sex = st.number_input("Sex (Male= 1, Female= 0)",
                                  min_value=0,
                                  max_value=1,
                                  value=0,
                                  step=1,
                                 )
    non_anginal_pain = st.number_input("Have you diagnosed with anginal pain? (Yes= 1, otherwise= 0)",
                              min_value=0,
                              max_value=1,
                              value=0,
                              step=1
                             )

    max_heart_rate = st.number_input("Maximum Heart rate Level (50-250)",
                              min_value=50,
                              max_value=250,
                              value=150,
                              step=1
                             )
    exercise_induced_angina = st.number_input("Is this type of pain arises under the stress of exercise? (Yes= 1, otherwise= 0)",
                          min_value=0,
                          max_value=1,
                          value=0,
                          step=1
                         )
    result = ""
    
    # When 'Predict' is clicked, make the prediction and store it
    if st.button("Predict"):
        result = prediction(age, sex, non_anginal_pain, max_heart_rate, exercise_induced_angina)
        st.success(result)
        
if __name__=='__main__':
    main()
    

In [None]:
!streamlit run app.py