# <center> Keras model on all numerical features </center>

## Import data and choose the numerical features

In [None]:
import pandas as pd 
from sklearn.preprocessing import StandardScaler # Used for scaling of data
import matplotlib.pyplot as plt

In [None]:
# Read in train data
trainfull = pd.read_csv('../HousePrices/trainfull.csv', index_col=0)

In [None]:
trainfull = trainfull.select_dtypes(exclude=['object'])

In [None]:
trainfull.to_csv('trainfull_allnum.csv', index=False)

In [None]:
trainfull

## Split Data  

Let's do 80/20 %

In [None]:
from numpy import random
SEED = 42
random.seed(SEED)

In [None]:
from sklearn.model_selection import train_test_split
VAL_SIZE = 0.2
train, val = train_test_split(trainfull, test_size=VAL_SIZE)

Fill the missing values 

In [None]:
train = train.fillna(0)
val = val.fillna(0)

Create inputs and output for train and val set 

In [None]:
col_train = list(train.columns)
col_train.remove('SalePrice')
Features = col_train
X_train = train[Features]
X_val = val[Features]

In [None]:
y_train = train['SalePrice'].values
y_val = val['SalePrice'].values

Standardise the data

In [None]:
scale = StandardScaler()
X_train = scale.fit_transform(X_train)

In [None]:
scale = StandardScaler()
X_val = scale.fit_transform(X_val)

## Create, compile and fit the Model

In [None]:
import keras
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import Adadelta
from keras.layers.advanced_activations import LeakyReLU
from keras.regularizers import l1

seed = 7
np.random.seed(seed)

# Model
model = Sequential()
model.add(Dense(200, input_dim=X_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(Dense(100, kernel_initializer='normal', activation='relu'))
model.add(Dense(50, kernel_initializer='normal', activation='relu'))
model.add(Dense(25, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_absolute_error', optimizer=Adadelta())

history = model.fit(X_train, y_train, validation_data=(X_val,y_val), epochs=150, batch_size=10)

In [None]:
# Evaluation on the test set created by train_test_split
model.evaluate(X_train, y_train)

## Learning Curve

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
#plt.gca().set_ylim(0, 1)
plt.title('Model performance throughout training')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.show()

## Prediction on Val

In [None]:
y_val_predict = model.predict(X_val)
y_val_predict

## Compute MAE, RMSE

In [None]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_val, y_val_predict)

In [None]:
from sklearn.metrics import mean_squared_error
from math import sqrt
root_mean_squared_error = sqrt(mean_squared_error(y_val, y_val_predict))
print(root_mean_squared_error)

## Preparation of the test and Prediction

In [None]:
test = pd.read_csv('../HousePrices/test.csv', header=0)
test = test.fillna(0)

In [None]:
id_col = test['Id'].values.tolist()
scale = StandardScaler()
X_test = test[Features]
X_test = scale.fit_transform(X_test)

In [None]:
prediction = model.predict(X_test)

In [None]:
submission = pd.DataFrame()
submission['Id'] = id_col
submission['SalePrice'] = prediction
submission.to_csv('prediction_keras_allnum.csv', index=False)

# Score Kaggle
<center> Optimizer Adam (lr=0,001): </center>  
Dense 200, 100, 50, 25, 1 : 0.15986  
<center> Optimizer Adadelta: </center>  
Dense 200, 100, 50, 25, 1 : 0.14182