# Importing Model


In [21]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [22]:
df_train = pd.read_csv('./train.csv')
categorical_cols = df_train.select_dtypes(exclude = ['float64', 'int64'])
numeric_cols = df_train.select_dtypes( ['float64', 'int64'])

#Here filling NaN numeric values with 0
df_train[numeric_cols.columns] = df_train[numeric_cols.columns].fillna(0)

# One Hot Encoding the categorical values
one_hot_encoder = OneHotEncoder()
df_one_hot_train = df_train

for col in categorical_cols:
  df2 = pd.DataFrame(one_hot_encoder.fit_transform(df_one_hot_train[[col]]).toarray())
  df2.columns = one_hot_encoder.get_feature_names_out()
  df_one_hot_train = df_one_hot_train.join(df2)

df_one_hot_train = df_one_hot_train.drop(categorical_cols,axis=1)

#Moving SalePrice (y) to be last column
df_one_hot_train=df_one_hot_train[[c for c in df_one_hot_train if c != "SalePrice"] + ["SalePrice"]]

cols_to_keep = ['GrLivArea', '2ndFlrSF','1stFlrSF','BsmtExposure_Gd','BsmtExposure_Av','BsmtExposure_Mn','BsmtExposure_No','BsmtExposure_nan',
                'Foundation_Wood','Foundation_Slab','Foundation_BrkTil','Foundation_CBlock','Foundation_PConc','Foundation_Stone','BldgType_Twnhs',
                'BldgType_TwnhsE','BldgType_Duplex','BldgType_2fmCon','BldgType_1Fam', 'GarageType_nan','GarageType_CarPort','GarageType_Basment',
                'GarageType_Detchd','GarageType_Attchd','GarageType_BuiltIn','GarageType_2Types','LotShape_IR3','LotShape_IR2','LotShape_Reg',
                'LotShape_IR1','Fence_GdPrv','Fence_MnWw','Fence_GdWo','Fence_nan','Fence_MnPrv','RoofStyle_Flat','RoofStyle_Hip','RoofStyle_Gable',
                'RoofStyle_Gambrel','RoofStyle_Mansard','RoofStyle_Shed', 'SalePrice']


final_df = df_one_hot_train[cols_to_keep]

print(final_df[final_df.columns[0:len(final_df.columns)-1]].columns)

# Training a new model with only the new columns
X_train, X_test, y_train, y_test = train_test_split(final_df[final_df.columns[0:len(final_df.columns)-1]], final_df['SalePrice'], test_size=0.3, random_state=10)

model = LinearRegression().fit(X_train, y_train)

Index(['GrLivArea', '2ndFlrSF', '1stFlrSF', 'BsmtExposure_Gd',
       'BsmtExposure_Av', 'BsmtExposure_Mn', 'BsmtExposure_No',
       'BsmtExposure_nan', 'Foundation_Wood', 'Foundation_Slab',
       'Foundation_BrkTil', 'Foundation_CBlock', 'Foundation_PConc',
       'Foundation_Stone', 'BldgType_Twnhs', 'BldgType_TwnhsE',
       'BldgType_Duplex', 'BldgType_2fmCon', 'BldgType_1Fam', 'GarageType_nan',
       'GarageType_CarPort', 'GarageType_Basment', 'GarageType_Detchd',
       'GarageType_Attchd', 'GarageType_BuiltIn', 'GarageType_2Types',
       'LotShape_IR3', 'LotShape_IR2', 'LotShape_Reg', 'LotShape_IR1',
       'Fence_GdPrv', 'Fence_MnWw', 'Fence_GdWo', 'Fence_nan', 'Fence_MnPrv',
       'RoofStyle_Flat', 'RoofStyle_Hip', 'RoofStyle_Gable',
       'RoofStyle_Gambrel', 'RoofStyle_Mansard', 'RoofStyle_Shed'],
      dtype='object')


In [None]:
# These coefficients are used in the streamlit app below
model.coef_

# Install Streamlit and localtunnel

In [1]:
!pip install -q streamlit
!npm install localtunnel

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m60.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m341.8/341.8 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for validators (setup.py) ... [?25l[?25hdone
[K[?25h[37;40mnpm[0m [0m

#Create a streamlit app example

In [119]:
%%writefile app.py
# Cheatsheet available at https://docs.streamlit.io/library/cheatsheet

import streamlit as st
# from streamlit_extras.switch_page_button import switch_page
import pandas as pd
import numpy as np




st.subheader('Please Input Your Project Specifications')
#the key for all the input will allow streamlit to remember as we go from one page to the next
grLivArea = st.number_input("Above Ground Living Area (Square Feet)", min_value=1)
secFloorSF = st.number_input("Second Floor (Square Feet)", min_value=1)
firFloorSF = st.number_input("First Floor (Square Feet)", min_value=1)

bsmtExposure = st.selectbox('Basement Exposure', ('Good Exposure', 'Average Exposure', 'Mimimum Exposure', 'No Exposure', 'No Basement'))
foundation = st.selectbox('Foundation', ('Brick & Tile','Cinder Block','Poured Contrete','Slab','Stone','Wood'))
bldgType = st.selectbox('Type of Dwelling', ('Single-family Detached','Two-family Conversion','Duplex','Townhouse End Unit','Townhouse Inside Unit'))
garageType = st.selectbox('Garage Location', ('More than one type of garage','Attached to home','Basement Garage','Built-In (Garage part of house - typically has room above garage)','Car Port','Detached from home','No Garage'))
lotShape = st.selectbox('General shape of property', ('Regular','Slightly irregular','Moderately Irregular', 'Irregular'))
fence = st.selectbox('Fence quality', ('Good Privacy','Minimum Privacy','Good Wood','Minimum Wood/Wire','No Fence'))
roofStyle = st.selectbox('Type of Roof', ('Flat','Gable','Gabrel (Barn)','Hip	Hip','Mansard','Shed'))

gen_predictions = st.button('Generate Prediction')

bsmt_options = ['Good Exposure', 'Average Exposure', 'Mimimum Exposure', 'No Exposure', 'No Basement']
foundation_options = ['Brick & Tile','Cinder Block','Poured Contrete','Slab','Stone','Wood']
bldg_options = ['Single-family Detached','Two-family Conversion','Duplex','Townhouse End Unit','Townhouse Inside Unit']
garage_options = ['More than one type of garage','Attached to home','Basement Garage','Built-In (Garage part of house - typically has room above garage)','Car Port','Detached from home','No Garage']
lot_options = ['Regular','Slightly irregular','Moderately Irregular', 'Irregular']
fence_options = ['Good Privacy','Minimum Privacy','Good Wood','Minimum Wood/Wire','No Fence']
roof_options = ['Flat','Gable','Gabrel (Barn)','Hip	Hip','Mansard','Shed']


if gen_predictions:
  coef = [-7.06795999e+00,  8.13266276e+01,  1.15934326e+02,  3.31709160e+04,
        9.76049844e+03, -1.52691007e+02, -1.07394636e+04, -3.20392598e+04,
      -5.13996397e+03,  8.70152253e+03, -1.07371517e+04, -4.48453612e+03,
        3.01932905e+04, -1.85331613e+04,  1.32954149e+04,  1.51727981e+04,
      -2.45898771e+04, -2.00918960e+04,  1.62135601e+04, -8.48270488e+03,
      -9.75725900e+03, -2.86881406e+04,  7.42035111e+03,  1.46451613e+04,
        2.64291428e+04, -1.56655071e+03, -6.24834458e+04,  3.07502720e+04,
        1.29604440e+04,  1.87727299e+04,  7.94554190e+02,  9.04966745e+03,
      -8.71663069e+03,  1.90492538e+03, -3.03251633e+03, -3.25369803e+04,
        2.70901668e+04,  8.18553810e+03,  1.00061876e+03, -5.40418553e+02,
      -3.19892472e+03]

  x = [0] * 41
  x[0] = grLivArea
  x[1] = secFloorSF
  x[2] = firFloorSF
  x[3 + bsmt_options.index(bsmtExposure)] = 1
  x[8 + foundation_options.index(foundation)] = 1
  x[14 + bldg_options.index(bldgType)] = 1
  x[19 +garage_options.index(garageType)] = 1
  x[26 + lot_options.index(lotShape)]= 1
  x[30 + fence_options.index(fence)]= 1
  x[35 + roof_options.index(roofStyle)]= 1


  cost_prediction = sum([co*var for co,var in zip(coef,x)])

  st.subheader('Prediction: '+"$"+f'{round(cost_prediction):,}')
  #st.write('We predict your house will sell for: ', "$"+f'{round(cost_prediction):,}')


  st.subheader('Visualizations')

  original_list = [co*var for co,var in zip(coef,x)]
  no_0s = [x for x in original_list if x != 0]
  test = np.array(no_0s)


  dictionary = {'Above Ground Living Area': test[0], 'Second Floor': test[1],
    'First Floor': test[2], 'Basement': test[3], 'Foundation': test[4],
    'Dwelling': test[5], 'Garage': test[6], 'Property Shape': test[7], 'Fence': test[8], 'Roof': test[9]}

  features = ["Above Ground Living Area",'Second Floor', 'First Floor', 'Basement']

  bsmt_exp_alt = pd.DataFrame({
      'feature':["Above Ground Living Area",'Second Floor', 'First Floor', 'Basement','Foundation','Dwelling','Garage','Property Shape','Fence','Roof'],
      'value':test})

  st.write('Breakdown of the project valuation:')
  st.bar_chart(bsmt_exp_alt, x="feature", y="value")

  st.markdown("""---""")
  st.text("")
  st.text("")
  st.write("The following graphs show the change in price while holding the other inputs constant:")

  gtab1, gtab2, gtab3, gtab4 = st.tabs(["Property Size and Layout", "Foundation", "Property Features", "Exterior Components"])

  with gtab1:


    st.subheader('Predicted Sales Price vs Second floor square footage')
    without_no_1_index = x.copy()
    del without_no_1_index[1]
    coef_no_1_index = coef.copy()
    del coef_no_1_index[1]
    b_2ndFlrSF= sum([co*var for co,var in zip(coef_no_1_index,without_no_1_index)])

    ndFlrSF_alt = pd.DataFrame({'2ndFlrSF': np.arange(0, 2065)})
    ndFlrSF_alt['price'] = ndFlrSF_alt['2ndFlrSF']*coef[1] + b_2ndFlrSF
    st.line_chart(ndFlrSF_alt, x='2ndFlrSF', y='price')


    st.subheader('Predicted Sales Price vs First floor square footage')
    without_no_2_index = x.copy()
    del without_no_2_index[2]
    coef_no_2_index = coef.copy()
    del coef_no_2_index[1]
    b_1stFlrSF= sum([co*var for co,var in zip(coef_no_2_index,without_no_2_index)])

    stFlrSF_alt = pd.DataFrame({'1stFlrSF': np.arange(0, 2065)})
    stFlrSF_alt['price'] = stFlrSF_alt['1stFlrSF']*coef[1] + b_1stFlrSF
    st.line_chart(stFlrSF_alt, x='1stFlrSF', y='price')


    st.subheader('Predicted Sales Price vs Above grade (ground) living area square footage')
    without_no_0_index = x.copy()
    without_no_0_index = without_no_0_index[1:]
    coef_no_0_index = coef[1:]
    b_gr_living = sum([co*var for co,var in zip(coef_no_0_index,without_no_0_index)])

    grLivArea_alt = pd.DataFrame({'GrLivArea': np.arange(300, 5642)})
    grLivArea_alt['price'] = grLivArea_alt['GrLivArea']*coef[0] + b_gr_living
    st.line_chart(grLivArea_alt, x='GrLivArea', y='price')


  with gtab2:

    def graph_data(starting_index, num, index_increment, t, coefficients):
      l = t.copy()
      l[starting_index+index_increment] = 0
      return_l = []
      for i in range(num):
        l[starting_index+i] = 1
        temp = sum([co*var for co,var in zip(coefficients,l)])
        return_l.append(round(temp))
        l[starting_index+i] = 0
      return return_l




    st.subheader('Predicted Sales Price vs Basement Exposure')
    bsmt_price_alt= graph_data(3,5,bsmt_options.index(bsmtExposure),x, coef)

    bsmt_exp_alt = pd.DataFrame({
        'type':['Good Exposure', 'Average Exposure', 'Mimimum Exposure', 'No Exposure', 'No Basement'],
        'price':bsmt_price_alt})
    st.bar_chart(bsmt_exp_alt, x='type', y='price')



    st.subheader('Predicted Sales Price vs Type of Foundation')

    foundation_price_alt= graph_data(8,6,foundation_options.index(foundation),x, coef)
    foundation_alt = pd.DataFrame({
        'type':['Brick & Tile','Cinder Block','Poured Contrete','Slab','Stone','Wood'],
        'price':foundation_price_alt})
    st.bar_chart(foundation_alt, x='type', y='price')



  # Property Features
  with gtab3:

    st.subheader('Predicted Sales Price vs Type of Building')
    BldgType_price_alt = graph_data(14,5,bldg_options.index(bldgType),x, coef)
    BldgType_alt = pd.DataFrame({
        'type':['Single-family Detached','Two-family Conversion','Duplex','Townhouse End Unit','Townhouse Inside Unit'],
        'price':BldgType_price_alt})
    st.bar_chart(BldgType_alt, x='type', y='price')

    st.subheader('Predicted Sales Price vs Lot Shape')
    LotShape_price_alt = graph_data(26,4,lot_options.index(lotShape),x, coef)
    LotShape_alt = pd.DataFrame({
        'type':['Regular','Slightly irregular','Moderately Irregular', 'Irregular'],
        'price':LotShape_price_alt})
    st.bar_chart(LotShape_alt, x='type', y='price')

    st.subheader('Predicted Sales Price vs Garage Type')
    GarageType_price_alt = graph_data(19,7,garage_options.index(garageType),x, coef)
    GarageType_alt = pd.DataFrame({
        'type':['More than one type of garage','Attached to home','Basement Garage','Built-In (Garage part of house - typically has room above garage)','Car Port','Detached from home','No Garage'],
        'price':GarageType_price_alt})
    st.bar_chart(GarageType_alt, x='type', y='price')



  # Exterior Components
  with gtab4:
    st.subheader('Predicted Sales Price vs Fence Privacy')
    Fence_price_alt = graph_data(30,5,fence_options.index(fence),x, coef)
    Fence_alt = pd.DataFrame({
        'type':['Good Privacy','Minimum Privacy','Good Wood','Minimum Wood/Wire','No Fence'],
        'price':Fence_price_alt})
    st.bar_chart(Fence_alt, x='type', y='price')

    st.subheader('Predicted Sales Price vs Roofing Style')
    RoofStyle_price_alt = graph_data(35,6,roof_options.index(roofStyle),x, coef)
    RoofStyle_alt = pd.DataFrame({
        'type':['Flat','Gable','Gabrel (Barn)','Hip	Hip','Mansard','Shed'],
        'price':RoofStyle_price_alt})
    st.bar_chart(RoofStyle_alt, x='type', y='price')




Overwriting app.py


# Run the app and keep logs

In [120]:
!streamlit run /content/app.py &>/content/logs.txt &

In [None]:
!curl ipv4.icanhazip.com
!npx localtunnel --port 8501

34.125.234.231
[K[?25hnpx: installed 22 in 2.833s
your url is: https://dark-cities-happen.loca.lt
