In [155]:
!pip install opendatasets



In [156]:
import opendatasets as od
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn import preprocessing as ps
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [157]:
od.download(
    "https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/data"
)

Skipping, found downloaded files in "./house-prices-advanced-regression-techniques" (use force=True to force download)


In [158]:
train=pd.read_csv("/content/house-prices-advanced-regression-techniques/train.csv")
test=pd.read_csv("/content/house-prices-advanced-regression-techniques/test.csv")

In [159]:
train.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [160]:
null_keys=[k for k,i in train.isnull().sum().items() if i!=0]
null_keys

['LotFrontage',
 'Alley',
 'MasVnrType',
 'MasVnrArea',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Electrical',
 'FireplaceQu',
 'GarageType',
 'GarageYrBlt',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PoolQC',
 'Fence',
 'MiscFeature']

In [161]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [162]:
from sklearn.preprocessing import OrdinalEncoder

oe=OrdinalEncoder()
non_numeric_features=train.select_dtypes(exclude='number').columns.tolist()
for i in non_numeric_features:
  train[i]=oe.fit_transform(train[[i]])

In [163]:
from sklearn.impute import KNNImputer
mssi=KNNImputer(n_neighbors=2, weights="uniform")
for i in null_keys:
  train[i]=mssi.fit_transform(train[[i]])

In [164]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   float64
 3   LotFrontage    1460 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   float64
 6   Alley          1460 non-null   float64
 7   LotShape       1460 non-null   float64
 8   LandContour    1460 non-null   float64
 9   Utilities      1460 non-null   float64
 10  LotConfig      1460 non-null   float64
 11  LandSlope      1460 non-null   float64
 12  Neighborhood   1460 non-null   float64
 13  Condition1     1460 non-null   float64
 14  Condition2     1460 non-null   float64
 15  BldgType       1460 non-null   float64
 16  HouseStyle     1460 non-null   float64
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [165]:
y=train['SalePrice']
train=train.drop(['SalePrice','Id'],axis=1)
normal=ps.MinMaxScaler(feature_range=(0, 1))
for i in train.keys():
  train[i]=normal.fit_transform(np.array(train[i]).reshape(-1,1))
y=normal.fit_transform(np.array(y).reshape(-1,1))

In [166]:
pca = PCA(n_components=10)
train = pca.fit_transform(train)

In [167]:
x_train,x_test,y_train,y_test=train_test_split(train,y,test_size=0.2,random_state=0)

In [168]:
model = keras.models.Sequential([
    keras.layers.InputLayer(input_shape=(10)),
    keras.layers.Dense(128, activation='sigmoid'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='tanh'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(16, activation='sigmoid'),
    keras.layers.Dense(8, activation='tanh'),
    keras.layers.Dense(1, activation='relu'),
])

model.compile(loss=keras.losses.MeanSquaredError(),
              optimizer=keras.optimizers.Adam(learning_rate=0.5),
              metrics=["accuracy"])
model.fit(
    x_train,
    y_train,
    batch_size=1,
    epochs=10,
    shuffle=True,
    verbose=2)

Epoch 1/10
1168/1168 - 5s - loss: 0.0554 - accuracy: 8.5616e-04 - 5s/epoch - 4ms/step
Epoch 2/10
1168/1168 - 6s - loss: 0.0529 - accuracy: 8.5616e-04 - 6s/epoch - 5ms/step
Epoch 3/10
1168/1168 - 5s - loss: 0.0529 - accuracy: 8.5616e-04 - 5s/epoch - 4ms/step
Epoch 4/10
1168/1168 - 6s - loss: 0.0529 - accuracy: 8.5616e-04 - 6s/epoch - 5ms/step
Epoch 5/10
1168/1168 - 5s - loss: 0.0529 - accuracy: 8.5616e-04 - 5s/epoch - 5ms/step
Epoch 6/10
1168/1168 - 4s - loss: 0.0529 - accuracy: 8.5616e-04 - 4s/epoch - 4ms/step
Epoch 7/10
1168/1168 - 4s - loss: 0.0529 - accuracy: 8.5616e-04 - 4s/epoch - 4ms/step
Epoch 8/10
1168/1168 - 3s - loss: 0.0529 - accuracy: 8.5616e-04 - 3s/epoch - 2ms/step
Epoch 9/10
1168/1168 - 3s - loss: 0.0529 - accuracy: 8.5616e-04 - 3s/epoch - 2ms/step
Epoch 10/10
1168/1168 - 3s - loss: 0.0529 - accuracy: 8.5616e-04 - 3s/epoch - 2ms/step


<keras.src.callbacks.History at 0x78b546fb0280>

In [169]:
model.evaluate(x_test, y_test, batch_size=1,verbose=2)

292/292 - 1s - loss: 0.0547 - accuracy: 0.0000e+00 - 610ms/epoch - 2ms/step


[0.054690491408109665, 0.0]