In [1]:
######################################
# Importing various python libraries
#  1. Pandas is a data analysis and manipulation library. We are using it to read the datasets
#  2. sklearn is machine learning library of python. we need it for running the prediction model.
#  3. pickle library is used for serializing a python object structure.
######################################
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

######################################
# Importing source dataset using read_csv function from pandas
# Default delimiter will be ',' and schema will be inferred from the data since no other paramets are provided
######################################

df = pd.read_csv('winequality-white.csv')


######################################
# Using implicit location function of the dataframe to isolate Response and Predictor variables
# first input represents all the row indexes and is represented by ':' and the second input represents the column indexes.
# 'chlorides  total_sulfur_dioxide  density    pH  alcohol' --> these are the features or independent variables
#'high_quality'--> this is the dependent variable and will be used to identify the quality of the wine
######################################
X = df.iloc[:,1:len(df.columns)]

y = df.iloc[:,0]
######################################
#The Block below  represents logistic regression model where max iterations are set to 800
# It is kind of a solver prediction model where probablility of variables are predicted based on classes of dependent variables
# Max iterations provide the maximum iteration that can be taken by the solver to converge
# X is the input variables and y is the prediction or output variable which has only 2 classes 0 and 1
# Model.fit generalises the data on which it was trained. It accurately approximates the output when provided with unseen inputs.
######################################

model = LogisticRegression(max_iter=800)
model.fit(X,y)
######################################
# Here we are predicting the variable 'High Quality' using all features
# Also using sklearn metrics library, we are calculating the accuracy of our predictions
######################################

predictions = model.predict(X)
print(accuracy_score(y,predictions))
######################################
# Using pickle we are serializing the python object and storing values 
# 1. First we are opening a file which will be named 'classifier'
# 2. Using dump we are then storing our python object 'model' to this file
# 3. we are closing the file
######################################


pickle_out = open('classifier', mode='wb')
pickle.dump(model, pickle_out)
pickle_out.close()

0.8023683135973867


In [4]:
%%writefile app.py

import pickle
import streamlit as st



pickle_in = open('classifier', 'rb')
classifier = pickle.load(pickle_in)

@st.cache()

# Define the function which will make the prediction using data
# inputs from users
def prediction(chlorides, total_sulfur_dioxide,
               density, pH, alcohol):
    
    # Make predictions
    prediction = classifier.predict(
        [[chlorides, total_sulfur_dioxide,
               density, pH, alcohol]])
    
    if prediction == 0:
        pred = 'Wine is of Bad Quality'
    else:
        pred = ' Congrats! This wine is of best Quality'
    return pred

# This is the main function in which we define our webpage
def main():
    
    # Create input fields
    chlorides = st.number_input("Number of chlorides(0-1)",
                                  min_value=0.000,
                                  max_value=1.000,
                                  value=0.01,
                                  step=0.001,
                                 )
    total_sulfur_dioxide = st.number_input("total sulfur dioxide Level (0-200)",
                              min_value=0,
                              max_value=200,
                              value=120,
                              step=10
                             )

    density = st.number_input("density Level (0-1)",
                              min_value=0.00,
                              max_value=1.00,
                              value=0.01,
                              step=0.001
                             )
    pH = st.number_input("PH Index (0-10)",
                          min_value=0.00,
                          max_value=10.00,
                          value=5.3,
                          step=0.01
                         )
    alcohol = st.number_input("alcohol in Years(max. 20)",
                          min_value=0,
                          max_value=20,
                          value=5,
                          step=1
                         )

    result = ""
    
    # When 'Predict' is clicked, make the prediction and store it
    if st.button("Predict"):
        result = prediction(chlorides, total_sulfur_dioxide,
               density, pH, alcohol)
        st.success(result)
       
if __name__=='__main__':
    main()
    

Overwriting app.py


In [5]:
!streamlit run app.py

^C
