## Predicting sale price
### This notebook contains the necessary code for predicting sale price with ML

Importing packages

In [16]:
try:
    import pandas as pd
    import plotly.express as px 
    import numpy as np
    import tensorflow as tf
    from sklearn.model_selection import train_test_split
    import sklearn
except:
    !pip install scikit-learn
    !pip install pandas
    !pip install plotly
    !pip install tensorflow
    import pandas as pd
    import plotly.express as px 
    import numpy as np
    import tensorflow as tf
    from sklearn.model_selection import train_test_split
    import sklearn

Defining the modell

In [17]:
def createModel(normlayer,trainshape1):
    model=tf.keras.Sequential([
        normlayer,
        tf.keras.layers.Dense(256,activation='relu',input_shape=(trainshape1,)),
        tf.keras.layers.Dropout(.1),
        tf.keras.layers.Dense(256,activation='relu'),
        tf.keras.layers.Dropout(.1),
        tf.keras.layers.Dense(128,activation='relu'),
        tf.keras.layers.Dense(1)
        
        
    ])
    model.compile(optimizer='adam',loss='mse',metrics=['mae'])
    return model

Loading data and removing KitchenQual since I have concluded that this knowledge doesn't improve the results as significant as it makes the program more complex 

In [18]:
data = pd.read_pickle('../data/clean_housing_data.pkl')
a=data.pop('KitchenQual')

In [19]:
data.columns

Index(['1stFlrSF', 'GarageArea', 'GarageYrBlt', 'GrLivArea', 'LotArea',
       'MasVnrArea', 'OpenPorchSF', 'OverallQual', 'TotalBsmtSF', 'YearBuilt',
       'YearRemodAdd', 'SalePrice'],
      dtype='object')

Splitting the data to feature and target sets

In [20]:
features = data.copy()
target=features.pop('SalePrice')

Save the list of important features

In [21]:
import pickle
with open('../data/important_features.pkl','wb') as f:
    pickle.dump(list(features.columns),f)

Creating test and train sets

In [22]:
X_train,X_test,Y_train,Y_test=train_test_split(features,target,test_size=.1)

Setting up the normalisation layer

In [23]:
normlayer=tf.keras.layers.Normalization()
normlayer.adapt(X_train)

Creating and fitting the model

In [24]:
es=tf.keras.callbacks.EarlyStopping(patience=10)
model=createModel(normlayer,X_train.shape[1])
model.fit(X_train,Y_train,batch_size=4,epochs=50,verbose=1,validation_split=.2,callbacks=[es])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50


<keras.callbacks.History at 0x25e717899a0>

### Evaluating the model

Evaluate on test set given the metrics Mean Squared Error and Mean Absolute Error

In [25]:
model.evaluate(X_test,Y_test)



[514159456.0, 16324.748046875]

Calculating R2

In [26]:
Y_test_prediction=model.predict(X_test).flatten()
Y_train_prediction= model.predict(X_train).flatten()

import pickle

with open('../data/Y_test_prediction.pkl','wb') as f:
    pickle.dump(list(Y_test_prediction),f)

with open('../data/Y_train_prediction.pkl','wb') as f:
    pickle.dump(list(Y_train_prediction),f)
    
with open('../data/Y_test.pkl','wb') as f:
    pickle.dump(list(Y_test),f)
    
with open('../data/Y_train.pkl','wb') as f:
    pickle.dump(list(Y_train),f)

r2_scores=[sklearn.metrics.r2_score(list(Y_test_prediction),list(Y_test)),
           sklearn.metrics.r2_score(list(Y_train_prediction),list(Y_train))]

print(f'R2 on test set: {r2_scores[0]}')
print(f'R2 on train set: {r2_scores[1]}')

with open('../data/r2_scores.pkl','wb') as f:
    pickle.dump(r2_scores,f)

R2 on test set: 0.7925088926906831
R2 on train set: 0.8217092341015256


Plotting the actual and the predicted values

In [27]:
fig=px.scatter({'Actual':list(Y_test),'Predicted':list(Y_test_prediction)},
           x='Actual',
           y='Predicted',
           range_x=[50_000,400_000],range_y=[50_000,400_000])

fig.add_shape(type='line',
                x0=70_000,
                y0=70_000,
                x1=380_000,
                y1=380_000,
                line=dict(color='Red',),
                xref='x',
                yref='y'
)
fig

In [28]:
fig=px.scatter({'Actual':list(Y_train),'Predicted':list(Y_train_prediction)},
           x='Actual',
           y='Predicted',
           range_x=[50_000,400_000],range_y=[50_000,400_000])

fig.add_shape(type='line',
                x0=70_000,
                y0=70_000,
                x1=380_000,
                y1=380_000,
                line=dict(color='Red',),
                xref='x',
                yref='y'
)
fig

Prediction

In [29]:
inhereted=pd.read_pickle('../data/inhereted_houses_corr.pkl')
inhereted.pop('KitchenQual')
inhereted.pop('LotFrontage')
model.predict(inhereted)



array([[141590.83],
       [160609.14],
       [175665.73],
       [181359.2 ]], dtype=float32)

Saving the model

In [30]:
model.save('../data/Model')



INFO:tensorflow:Assets written to: ../data/Model\assets


INFO:tensorflow:Assets written to: ../data/Model\assets
