# ARTIFICIAL NEURAL NETWORK ON HOUSING PRICES

**IMPORTING REQUIRED LIBRARIES**

In [None]:
!pip install keras-tuner



## IMPORTING TOOLS

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from kerastuner import RandomSearch
from sklearn.metrics import mean_squared_error, mean_absolute_error

**CHECKING FOR GPU**

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPU's available :", len(physical_devices))

Num GPU's available : 1


In [None]:
#Importing dataset
df = pd.read_csv('drive/MyDrive/Housing project/Housing _data_for_modelling.csv')
df.drop('Unnamed: 0', axis = 1, inplace = True)
df.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,DateSold
0,120,3,70.0,4928,1,0,3,0,4,0,13,2,2,4,2,6,5,1976,1976,1,1,8,9,2,0.0,3,4,1,2,3,3,0,120,5,0,958,1078,1,4,1,4,958,0,0,958,0,0,2,0,2,1,3,5,6,1,1,1977,1,440,4,4,2,0,205,0,0,0,0,0,2,2007,8,4,128000,13
1,20,3,95.0,15865,1,0,3,0,4,1,12,2,2,0,2,8,6,1970,1970,0,5,12,13,2,0.0,2,2,2,3,1,1,0,351,4,823,1043,2217,1,0,1,4,2217,0,0,2217,1,0,2,0,4,1,2,8,6,1,1,1970,2,621,4,4,2,81,207,0,0,224,0,0,10,2007,8,4,268000,21
2,60,3,92.0,9920,1,0,3,0,1,0,15,2,2,0,5,7,5,1996,1997,1,1,7,7,2,0.0,2,4,2,2,3,0,2,862,5,0,255,1117,1,0,1,4,1127,886,0,2013,1,0,2,1,3,1,3,8,6,1,1,1997,2,455,4,4,2,180,130,0,0,0,0,0,6,2007,8,4,269790,17
3,20,3,105.0,11751,1,0,3,0,4,0,14,2,2,0,2,6,6,1977,1977,3,1,8,9,1,480.0,3,4,1,2,3,3,1,705,5,0,1139,1844,1,0,1,4,1844,0,0,1844,0,0,2,0,3,1,3,7,6,1,1,1977,1,546,4,4,2,0,122,0,0,0,0,0,1,2010,0,4,190000,48
4,20,3,70.0,16635,1,0,3,0,2,0,14,2,2,0,2,6,7,1977,2000,1,1,4,4,3,126.0,2,4,1,2,3,3,0,1246,5,0,356,1602,1,2,1,4,1602,0,0,1602,0,1,2,0,3,1,2,8,6,1,1,1977,0,529,4,4,2,240,0,0,0,0,0,0,6,2009,8,4,215000,41


In [None]:
#splitting the dataset
X = df.drop('SalePrice', axis = 1)
y = df['SalePrice']

In [None]:
#creating the function for scaling the dataset
Scaler = MinMaxScaler()
def scale_data(data):
    """
    It scales the data using standard scaler
    """
    data = Scaler.fit_transform(data)
    return data

In [None]:
Xnew = scale_data(X)

In [None]:
#splitting the data into train and val
X_train, X_val, y_train, y_val = train_test_split(Xnew,
                                                   y,
                                                   test_size = 0.2,
                                                   random_state = 42)

In [None]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape

((934, 74), (934,), (234, 74), (234,))

**CREATING MODEL**

In [None]:
#early stopping parameter for preventing model from overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                                patience= 5)

In [None]:
def build_model(hp):
    hp_units = hp.Int('units', min_value=75, max_value=512, step=32)
    hp_activation = hp.Choice('activation', values = ['relu','linear','elu'])
    hp_kernel_initializer = hp.Choice('kernel_initializer', values = ['normal','he_normal','he_uniform','uniform'])
    model = Sequential([
                    Dense(units = hp_units, input_shape = (74,), kernel_initializer =  hp_kernel_initializer, activation = hp_activation),
                    Dense(units = hp_units, kernel_initializer = hp_kernel_initializer, activation = hp_activation),
                    Dense(units = hp_units,kernel_initializer = hp_kernel_initializer, activation = hp_activation),
                    Dense(units = 1 , activation = hp_activation)
])

    model.compile(loss = 'mse',
                  optimizer = keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3])), 
                                                              metrics = ['mse', 'mae'])

    return model


## FINDING THE BEST PARAMETERS USING KERAS TUNER

In [None]:
tuner_search = RandomSearch(build_model,
                            objective = 'val_mse',
                            max_trials = 30,
                            directory = 'output',
                            project_name = 'house prices15')

In [None]:
tuner_search.search(X_train,y_train, validation_split = 0.2, epochs = 100, callbacks = [early_stopping])

Trial 30 Complete [00h 00m 02s]
val_mse: 1934640384.0

Best val_mse So Far: 1548912640.0
Total elapsed time: 00h 02m 02s
INFO:tensorflow:Oracle triggered exit


**BUILDING MODEL WITH BEST PARAMETERS**

In [None]:
model = tuner_search.get_best_models(num_models = 1)[0]

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 107)               8025      
_________________________________________________________________
dense_1 (Dense)              (None, 107)               11556     
_________________________________________________________________
dense_2 (Dense)              (None, 107)               11556     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 108       
Total params: 31,245
Trainable params: 31,245
Non-trainable params: 0
_________________________________________________________________


**FITTING MODEL WITH BEST HYPERPARAMETERS**

In [None]:
model.fit(X_train,
          y_train,
          batch_size = 32,
          epochs = 50,
          verbose = True,
          callbacks = early_stopping,
          validation_split = 0.2)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


<tensorflow.python.keras.callbacks.History at 0x7f86dc848150>

#PREDICTING ON TEST DATA

In [None]:
df_test = pd.read_csv('drive/MyDrive/Housing project/test.csv')
df_test.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,337,20,RL,86.0,14157,Pave,,IR1,HLS,AllPub,Corner,Gtl,StoneBr,Norm,Norm,1Fam,1Story,9,5,2005,2006,Hip,CompShg,VinylSd,VinylSd,Stone,200.0,Gd,TA,PConc,Ex,TA,Gd,GLQ,1249,Unf,0,673,1922,GasA,Ex,Y,SBrkr,1922,0,0,1922,1,0,2,0,3,1,Gd,8,Typ,1,Gd,Attchd,2005.0,Fin,3,676,TA,TA,Y,178,51,0,0,0,0,,,,0,7,2007,WD,Normal
1,1018,120,RL,,5814,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1984,1984,Gable,CompShg,HdBoard,HdBoard,,0.0,Gd,TA,CBlock,Gd,TA,Av,GLQ,1036,Unf,0,184,1220,GasA,Gd,Y,SBrkr,1360,0,0,1360,1,0,1,0,1,1,Gd,4,Typ,1,Ex,Attchd,1984.0,RFn,2,565,TA,TA,Y,63,0,0,0,0,0,,,,0,8,2009,COD,Abnorml
2,929,20,RL,,11838,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2001,2001,Hip,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,PConc,Gd,TA,Av,Unf,0,Unf,0,1753,1753,GasA,Ex,Y,SBrkr,1788,0,0,1788,0,0,2,0,3,1,Ex,7,Typ,1,TA,Attchd,2001.0,RFn,2,522,TA,TA,Y,202,151,0,0,0,0,,,,0,6,2009,WD,Normal
3,1148,70,RL,75.0,12000,Pave,,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,7,1941,1950,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,275,Unf,0,429,704,GasA,Ex,Y,SBrkr,860,704,0,1564,0,0,1,1,3,1,Fa,7,Typ,1,Gd,Attchd,1941.0,Unf,1,234,TA,TA,Y,0,0,0,0,0,0,,,,0,7,2009,WD,Normal
4,1227,60,RL,86.0,14598,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,Somerst,Feedr,Norm,1Fam,2Story,6,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,Stone,74.0,Gd,TA,PConc,Gd,TA,Mn,Unf,0,Unf,0,894,894,GasA,Ex,Y,SBrkr,894,1039,0,1933,0,0,2,1,4,1,Gd,9,Typ,1,Gd,BuiltIn,2007.0,Fin,3,668,TA,TA,Y,100,18,0,0,0,0,,,,0,1,2008,WD,Normal


## Preprocessing test data same as train data

In [None]:
#dropping the features
def drop_features(data):
    """
    It drops the features which contains more than `40%` of missing values
    """
    percentage = dict(round(data.isna().sum()/len(data)*100,2))
    for key, values in percentage.items():
        if values > 40:
            print(f'The {key} has {values} % of missing values.')
            data = data.drop(key, axis = 1)
    return data 

In [None]:
df_test = drop_features(df_test)

The Alley has 95.21 % of missing values.
The FireplaceQu has 47.6 % of missing values.
The PoolQC has 100.0 % of missing values.
The Fence has 84.93 % of missing values.
The MiscFeature has 96.58 % of missing values.


In [None]:
#filling the missing values using simple imputer
def fill_values(data):
    """
    Fills all the missing values using simple imputer,Categorical values with the 'most frequent' values,Numerical values with the  'median' values.
    """

    for labels, content in data.items():
         # Filling the missing values of object dtype(categorical) with 'most frequent'
        if pd.api.types.is_object_dtype(content):
            if pd.isnull(content).any():
                Imputer = SimpleImputer(strategy = 'most_frequent')
                data[labels] = Imputer.fit_transform(data[labels].values.reshape(-1,1))[:,0]
        
        # Filling the missing values of the numerical dtype
        else:
            if pd.isnull(content).any():
                Imputer1= SimpleImputer(strategy = 'median')
                data[labels] = Imputer1.fit_transform(data[labels].values.reshape(-1,1))[:,0]
    return data   

In [None]:
df_test = fill_values(df_test)

In [None]:
#Converting all the features into numeric
le = LabelEncoder()

#creating function for converting the features into numerical
def convert_features(data):
    """
    Converts the categorical features into numerical features
    """
    for cols in data.columns:
        if not pd.api.types.is_numeric_dtype(data[cols]):
            data[cols] = le.fit_transform(data[cols])
    return data  

In [None]:
df_test = convert_features(df_test)

In [None]:
set(df_test.columns) - set(X.columns)

{'GarageCars', 'Id'}

In [None]:
df_test_final = df_test.drop(['GarageCars', 'Id'], axis = 1)
set(X.columns) - set(df_test_final.columns)

{'DateSold'}

In [None]:
df_test_final['DateSold'] = df_test_final['MoSold'].astype(str) + '' +  df_test_final['YrSold'].astype(str)

In [None]:
df_test_final['DateSold'] = df_test_final['DateSold'].astype('int')

In [None]:
#scaling data
scaler = MinMaxScaler()
def scale_data(data):
    """
    It scales the data using Standard scaler 
    """
    data = scaler.fit_transform(data)
    return data

In [None]:
df_test_final = scale_data(df_test_final)

## Predicting on test data

In [None]:
predictions = model.predict(df_test_final)
preds = predictions.reshape(-1,)  
preds =  pd.Series(preds, name = 'Predictions')


0      402211.937500
1      230504.015625
2      319135.875000
3      173531.765625
4      287439.343750
           ...      
287    304982.718750
288    147207.140625
289    186751.453125
290    199408.234375
291     93342.437500
Name: Predictions, Length: 292, dtype: float32

In [None]:
IDs = pd.DataFrame(df_test['Id'])
Final_predictions = pd.concat([IDs, preds], axis = 1)
Final_predictions.head()

Unnamed: 0,Id,Predictions
0,337,402211.9375
1,1018,230504.015625
2,929,319135.875
3,1148,173531.765625
4,1227,287439.34375


## SAVING PREDICTIONS


In [None]:
Final_predictions.to_csv('drive/MyDrive/Housing project/DeepLearningPredictions.csv')

## SAVING MODEL

In [None]:
model.save('drive/MyDrive/Housing project/HousingPricePredictionANN.h5')