In [1]:
import pandas as pd
from libs.mlproject import CSVLoaderMixin, MLProject, ModelInterface

from tensorflow.keras import  Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
original_train_data = pd.read_csv('../data/house-prices/train.csv')

In [3]:
original_train_data.isna().value_counts()

Id     MSSubClass  MSZoning  LotFrontage  LotArea  Street  Alley  LotShape  LandContour  Utilities  LotConfig  LandSlope  Neighborhood  Condition1  Condition2  BldgType  HouseStyle  OverallQual  OverallCond  YearBuilt  YearRemodAdd  RoofStyle  RoofMatl  Exterior1st  Exterior2nd  MasVnrType  MasVnrArea  ExterQual  ExterCond  Foundation  BsmtQual  BsmtCond  BsmtExposure  BsmtFinType1  BsmtFinSF1  BsmtFinType2  BsmtFinSF2  BsmtUnfSF  TotalBsmtSF  Heating  HeatingQC  CentralAir  Electrical  1stFlrSF  2ndFlrSF  LowQualFinSF  GrLivArea  BsmtFullBath  BsmtHalfBath  FullBath  HalfBath  BedroomAbvGr  KitchenAbvGr  KitchenQual  TotRmsAbvGrd  Functional  Fireplaces  FireplaceQu  GarageType  GarageYrBlt  GarageFinish  GarageCars  GarageArea  GarageQual  GarageCond  PavedDrive  WoodDeckSF  OpenPorchSF  EnclosedPorch  3SsnPorch  ScreenPorch  PoolArea  PoolQC  Fence  MiscFeature  MiscVal  MoSold  YrSold  SaleType  SaleCondition  SalePrice
False  False       False     False        False    False   Tru

In [4]:
x_train = original_train_data[['MSSubClass', 'MSZoning', 'LotArea', 'Neighborhood']]
x_train.head()

Unnamed: 0,MSSubClass,MSZoning,LotArea,Neighborhood
0,60,RL,8450,CollgCr
1,20,RL,9600,Veenker
2,60,RL,11250,CollgCr
3,70,RL,9550,Crawfor
4,60,RL,14260,NoRidge


In [15]:
class HousePriceProject(CSVLoaderMixin, MLProject):
  train_file_path = '../data/house-prices/train.csv'
  test_file_path = '../data/house-prices/test.csv'
  x_columns = ['MSSubClass', 'MSZoning', 'LotArea', 'Neighborhood']
  x_columns_to_dummify = ['MSSubClass', 'MSZoning', 'Neighborhood']
  y_columns = ['SalePrice']
  fit_kwargs ={
    'batch_size' : 30,
    'epochs' : 20,
    'callbacks' : [EarlyStopping(monitor='loss', patience=2)],
  }

  def get_model(self) -> ModelInterface:
    input_layer = Input(shape=self.x_train.shape[1])
    layer = Dense(60, activation='relu')(input_layer)
    layer = Dense(60, activation='sigmoid')(layer)
    layer = Dense(1, activation='relu')(layer)
    model = Model(input_layer, layer)
    model.compile(optimizer='adam', loss='binary_crossentropy')
    print(model.summary())
    return model

In [16]:
project = HousePriceProject()

In [17]:
project.x_train

Unnamed: 0,LotArea,MSSubClass_20,MSSubClass_30,MSSubClass_40,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,...,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker
0,8450,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9600,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,11250,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,9550,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,14260,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,7917,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1456,13175,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1457,9042,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1458,9717,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
project.y_train

Unnamed: 0,SalePrice
0,208500
1,181500
2,223500
3,140000
4,250000
...,...
1455,175000
1456,210000
1457,266500
1458,142125


In [19]:
project.train()



Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 46)]              0         
_________________________________________________________________
dense_4 (Dense)              (None, 60)                2820      
_________________________________________________________________
dense_5 (Dense)              (None, 60)                3660      
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 61        
Total params: 6,541
Trainable params: 6,541
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
