# Housing Prices

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split




# Prepare Data

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
print(train.columns.values)

['Id' 'MSSubClass' 'MSZoning' 'LotFrontage' 'LotArea' 'Street' 'Alley'
 'LotShape' 'LandContour' 'Utilities' 'LotConfig' 'LandSlope'
 'Neighborhood' 'Condition1' 'Condition2' 'BldgType' 'HouseStyle'
 'OverallQual' 'OverallCond' 'YearBuilt' 'YearRemodAdd' 'RoofStyle'
 'RoofMatl' 'Exterior1st' 'Exterior2nd' 'MasVnrType' 'MasVnrArea'
 'ExterQual' 'ExterCond' 'Foundation' 'BsmtQual' 'BsmtCond' 'BsmtExposure'
 'BsmtFinType1' 'BsmtFinSF1' 'BsmtFinType2' 'BsmtFinSF2' 'BsmtUnfSF'
 'TotalBsmtSF' 'Heating' 'HeatingQC' 'CentralAir' 'Electrical' '1stFlrSF'
 '2ndFlrSF' 'LowQualFinSF' 'GrLivArea' 'BsmtFullBath' 'BsmtHalfBath'
 'FullBath' 'HalfBath' 'BedroomAbvGr' 'KitchenAbvGr' 'KitchenQual'
 'TotRmsAbvGrd' 'Functional' 'Fireplaces' 'FireplaceQu' 'GarageType'
 'GarageYrBlt' 'GarageFinish' 'GarageCars' 'GarageArea' 'GarageQual'
 'GarageCond' 'PavedDrive' 'WoodDeckSF' 'OpenPorchSF' 'EnclosedPorch'
 '3SsnPorch' 'ScreenPorch' 'PoolArea' 'PoolQC' 'Fence' 'MiscFeature'
 'MiscVal' 'MoSold' 'YrSold' 'SaleTy

In [4]:
train.isnull().sum()

Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64

# Preprocess Data

features that need encoding:
- MSZoning
- Street
- Alley (contains null values)
- LotShape
- LandContour
- Utilities
- LotConfig
- LandSlope
- Neighborhood
- Condition1
- Condition2
- BldgType
- HouseStyle
- RoofStyle
- RoofMatl
- Exterior1st
- Exterior2nd
- MasVnrType
- ExterQual
- ExterCond
- Foundation
- BsmtQual (contains null values)
- BsmtExposure (contains null values)
- BsmtFinType1 (contains null values)
- BsmtFinType2 (contains null values)
- Heating
- HeatingQC
- CentralAir
- Electrical
- KitchenQual
- Functional
- FireplaceQu (contains null values)
- GarageType (contains null values)
- GarageFinish (contains null values)
- GarageQual (contains null values)
- GarageCond (contains null values)
- PavedDrive
- PoolQC (contains null values)
- Fence (contains null values)
- MiscFeature (contains null values)
- SaleType
- SaleCondition

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

label_alley = LabelEncoder()
label_bsmtqual = LabelEncoder()
label_bsmtcond = LabelEncoder()
label_mszoning = LabelEncoder ()
label_bsmtexposure = LabelEncoder()
label_bsmtfintype1 = LabelEncoder()
label_bsmtfintype2 = LabelEncoder()
label_fireplacequ = LabelEncoder()
label_garagetype = LabelEncoder()
label_garagefinish = LabelEncoder()
label_garagequal = LabelEncoder()
label_garagecond = LabelEncoder()
label_poolqc = LabelEncoder()
label_fence = LabelEncoder()
label_miscfeatures = LabelEncoder()
label_street = LabelEncoder()
label_lotshape = LabelEncoder()
label_landcontour = LabelEncoder()
label_utilities = LabelEncoder()
label_lotconfig = LabelEncoder()
label_landslope = LabelEncoder()
label_neighborhood = LabelEncoder()
label_condition1 = LabelEncoder()
label_condition2 = LabelEncoder()
label_bldgtype = LabelEncoder()
label_housestyle = LabelEncoder()
label_roofstyle = LabelEncoder()
label_roofmatl = LabelEncoder()
label_exterior1st = LabelEncoder()
label_exterior2nd = LabelEncoder()
label_masvnrtype = LabelEncoder()
label_exterqual = LabelEncoder()
label_extercond = LabelEncoder()
label_foundation = LabelEncoder()
label_heating = LabelEncoder()
label_heatingqc = LabelEncoder()
label_centralair = LabelEncoder()
label_electrical = LabelEncoder()
label_kitchenqual = LabelEncoder()
label_functional = LabelEncoder()
label_paveddrive = LabelEncoder()
label_saletype = LabelEncoder()
label_salecondition = LabelEncoder()

labels = [
    'Alley', 'BsmtQual', 'BsmtCond', 'MSZoning', 'BsmtExposure', 'BsmtFinType1',
    'BsmtFinType2', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
    'GarageCond', 'PoolQC', 'Fence', 'MiscFeature', 'Street', 'LotShape',
    'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',
    'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl',
    'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond',
    'Foundation', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual',
    'Functional', 'PavedDrive', 'SaleType', 'SaleCondition']

encoders = [
    label_alley, label_bsmtqual, label_bsmtcond, label_mszoning, label_bsmtexposure,
    label_bsmtfintype1, label_bsmtfintype2, label_fireplacequ, label_garagetype,
    label_garagefinish, label_garagequal, label_garagecond, label_poolqc,
    label_fence, label_miscfeatures, label_street, label_lotshape, label_landcontour,
    label_utilities, label_lotconfig, label_landslope, label_neighborhood,
    label_condition1, label_condition2, label_bldgtype, label_housestyle,
    label_roofstyle, label_roofmatl, label_exterior1st, label_exterior2nd,
    label_masvnrtype, label_exterqual, label_extercond, label_foundation,
    label_heating, label_heatingqc, label_centralair, label_electrical,
    label_kitchenqual, label_functional, label_paveddrive, label_saletype,
    label_salecondition]


for data in [train, test]:
    
    #fill missing data
    data['Alley'].fillna('NoAccess', inplace=True)
    data['BsmtQual'].fillna('NoBasement', inplace=True)
    data['BsmtCond'].fillna('NoBasement', inplace=True)
    data['BsmtExposure'].fillna('NoBasement', inplace=True)
    data['BsmtFinType1'].fillna('NoBasement', inplace=True)
    data['BsmtFinType2'].fillna('NoBasement', inplace=True)
    data['FireplaceQu'].fillna('NoFireplace', inplace=True)
    data['GarageType'].fillna('NoGarage', inplace=True)
    data['GarageFinish'].fillna('NoGarage', inplace=True)
    data['GarageQual'].fillna('NoGarage', inplace=True)
    data['GarageCond'].fillna('NoGarage', inplace=True)
    data['PoolQC'].fillna('NoPool', inplace=True)
    data['Fence'].fillna('NoFence', inplace=True)
    data['MiscFeature'].fillna('NoMiscFeatures', inplace=True)
    data['MasVnrType'].fillna('None', inplace=True)

    #fill numerical null values
    data['LotFrontage'].fillna(data['LotFrontage'].mean(), inplace=True)
    data['MasVnrArea'].fillna(0, inplace=True)
    data['GarageYrBlt'].fillna(0, inplace=True)
    data['BsmtFinSF1'].fillna(0, inplace=True)
    data['BsmtFinSF2'].fillna(0, inplace=True)
    data['BsmtUnfSF'].fillna(0, inplace=True)
    data['TotalBsmtSF'].fillna(0, inplace=True)
    data['BsmtHalfBath'].fillna(0, inplace=True)
    data['BsmtFullBath'].fillna(0, inplace=True)
    data['GarageCars'].fillna(0, inplace=True)
    data['GarageArea'].fillna(0, inplace=True)
    
    # use labelencoder to encode values into numerical values
    for label,encoder in zip(labels,encoders):
        data[label]=encoder.fit_transform(data[label])

    # use MinMaxScaler to scale the data
    minmax_scaler = MinMaxScaler()
    data = minmax_scaler.fit_transform(data)

In [13]:
exclude_features = ['Id', 'SalePrice']
X_train = train[train.columns.difference(exclude_features)]
Y_train = train['SalePrice'].to_numpy().reshape((-1,1))
X_test = test[test.columns.difference(exclude_features)]

KeyError: 'SalePrice'

In [7]:
null_columns = train.columns[train.isnull().any()]
print("Columns with null values:", null_columns.tolist())
null_columns = test.columns[test.isnull().any()]
print("Columns with null values:", null_columns.tolist())

Columns with null values: []
Columns with null values: []


In [59]:
lr = LinearRegression()
lr.fit(X_train, Y_train)

In [61]:
lr_predict=lr.predict(X_test)
lr_predict=np.array(lr_predict)
lr_predict = lr_predict.flatten()
print(lr_predict)
print(test.Id.shape)

[194637.87887351 251550.07432535 257690.35825905 ... 233601.60233327
 207008.55454586 333112.59958196]
(1459,)


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [10]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])
model.compile(optimizer='adam', loss='mean_squared_error')




In [11]:
model.fit(X_train, Y_train, epochs=100)

Epoch 1/100

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 7

<keras.src.callbacks.History at 0x1905ce0f070>

In [14]:
# Make predictions
predictions = model.predict(X_test)



In [18]:
output = pd.DataFrame({'Id': test.Id.values, 'SalePrice': predictions.flatten()})
output.to_csv('submission.csv', index=False)


In [19]:
submission = pd.read_csv('submission.csv')
submission.head(5)

Unnamed: 0,Id,SalePrice
0,1461,143630.8
1,1462,172277.45
2,1463,183566.16
3,1464,186993.28
4,1465,166155.92
