# Kevin Kyendy Mauwi

# Tasks:

Define the problem: Determine the goal of the project and the problem you're trying to solve using the feedforward neural network.

Gather the data: Download the dataset from the Kaggle link provided above.

Prepare the data: Clean and preprocess the data. Handle missing values, encode categorical variables, and normalize or standardize numerical variables.

Create the model: Build a feedforward neural network using Keras with an appropriate architecture for predicting house prices. You can experiment with different numbers of layers, neurons, and activation functions.

Train the model: Train the neural network on the preprocessed data. Experiment with different batch sizes and numbers of epochs to improve the model's performance.

Evaluate the model: Test the model's performance on a separate validation or test set. Calculate metrics such as mean squared error, mean absolute error, and R-squared to assess the model's accuracy.

Optimize the model: Perform hyperparameter tuning to find the optimal architecture and parameters for the neural network. You can use techniques like Grid Search or Random Search.

In [12]:
# lib importation
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [4]:
# loading the dataset
data_train = "train.csv"
data_test = "test.csv"

train_data = pd.read_csv(data_train)
test_data = pd.read_csv(data_test)

In [5]:
# Taining dataset
train_data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [6]:
# Testing dataset
test_data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal


In [7]:
# Handling missing values
train_data.fillna(-1, inplace = True)
test_data.fillna(-1, inplace = True)

In [10]:
# Changing categorical values to strings (categorical columns = catcols)
catcols = train_data.select_dtypes(include = ['object']).columns
train_data[catcols] = train_data[catcols].astype(str)
test_data[catcols] = test_data[catcols].astype(str)

In [13]:
# Encoding categorical values
one_hot_encoder = OneHotEncoder(handle_unknown = 'ignore')
encoded_train = one_hot_encoder.fit_transform(train_data[catcols]).toarray()
encoded_test = one_hot_encoder.transform(test_data[catcols]).toarray()

In [14]:
train_data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,-1,Reg,Lvl,AllPub,...,0,-1,-1,-1,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,-1,Reg,Lvl,AllPub,...,0,-1,-1,-1,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,-1,IR1,Lvl,AllPub,...,0,-1,-1,-1,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,-1,IR1,Lvl,AllPub,...,0,-1,-1,-1,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,-1,IR1,Lvl,AllPub,...,0,-1,-1,-1,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,-1,Reg,Lvl,AllPub,...,0,-1,-1,-1,0,8,2007,WD,Normal,175000
1456,1457,20,RL,85.0,13175,Pave,-1,Reg,Lvl,AllPub,...,0,-1,MnPrv,-1,0,2,2010,WD,Normal,210000
1457,1458,70,RL,66.0,9042,Pave,-1,Reg,Lvl,AllPub,...,0,-1,GdPrv,Shed,2500,5,2010,WD,Normal,266500
1458,1459,20,RL,68.0,9717,Pave,-1,Reg,Lvl,AllPub,...,0,-1,-1,-1,0,4,2010,WD,Normal,142125
