In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Flatten, InputLayer
import keras
# Reading the data
train = pd.read_csv('./dataset/houseprice_data/train.csv')
trainX, trainY = train.iloc[:, :train.shape[1]-1], train.iloc[:, train.shape[1]-1]
# There are a total of 43 categorical columns
categoricals = trainX.loc[:, trainX.dtypes == 'O'].columns
len(categoricals) # 43
# Preprocessing step: 
# One Hot Encoder cannot work with NaN, hence filling NaN with mode of categorical columns
cat_features = trainX.loc[:, categoricals]
cat_features = cat_features.fillna(cat_features.mode().iloc[0, :])
# One hot encoding these features
ohe = OneHotEncoder(handle_unknown='ignore')
res = ohe.fit_transform(cat_features).toarray()
cols = np.array([])
for i in range(cat_features.shape[1]):
    cols = np.concatenate((cols, categoricals[i] + '_' + np.sort(cat_features.iloc[:, i].unique())))    
cat = pd.DataFrame(res, columns=cols)
# Total 252 categorical features
cat.shape # (1460, 252)
# Dropping original categorical variables
trainX = trainX.drop(categoricals, axis=1)
# Concatenating the One Hot Encoded variables to the train dataset
trainX = pd.concat([trainX, cat], axis=1)
# New data shape
trainX.shape # (1460, 289)
# Filling the NaN with median
trainX.fillna(trainX.median(), inplace=True)
# Normalizing training features
scalar = MinMaxScaler()
norm_train = pd.DataFrame(scalar.fit_transform(trainX), columns=trainX.columns)
# Normalizing training target
scalar_target = MinMaxScaler()
trainY = scalar_target.fit_transform(trainY.values.reshape(-1, 1))
# Defining the network
model = Sequential([
  Dense(norm_train.shape[1], input_dim=norm_train.shape[1], activation='sigmoid'),    
  Dense(units=norm_train.shape[1]//2, activation='sigmoid'),    
  Dense(units=1, activation='softmax'),
])
# Printing model summary
model.summary()
# _________________________________________________________________
# Layer (type)                 Output Shape              Param #   
# =================================================================
# dense_34 (Dense)             (None, 289)               83810     
# _________________________________________________________________
# dense_35 (Dense)             (None, 144)               41760     
# _________________________________________________________________
# dense_36 (Dense)             (None, 1)                 145       
# =================================================================
# Total params: 125,715
# Trainable params: 125,715
# Non-trainable params: 0
# Compiling and Training Network
model.compile(optimizer='sgd', loss='mean_squared_error')
model.fit(trainX, trainY, batch_size=512, epochs=20, verbose=1, validation_split=0.2)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - loss: 0.6476 - val_loss: 0.6456
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6481 - val_loss: 0.6456
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.6491 - val_loss: 0.6456
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6500 - val_loss: 0.6456
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6501 - val_loss: 0.6456
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.6479 - val_loss: 0.6456
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.6478 - val_loss: 0.6456
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.6481 - val_loss: 0.6456
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20

<keras.src.callbacks.history.History at 0x2d08613f790>

In [2]:
train.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000
