In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [2]:
# setup seed for reproducibility
from numpy.random import seed
seed(1)
tf.random.set_seed(2)

In [3]:
# load the dataset
train_path = './data/train.csv'
df_train = pd.read_csv(train_path, low_memory=False)
test_path = './data/test.csv'
df_test = pd.read_csv(test_path, low_memory=False)

In [4]:
# get data in np.array format
X, y = df_train.values[:, :-1], df_train.values[:, -1]

In [5]:
# scale data to improve convergence (we will use the scaler for the test data as well)
train_scaler = StandardScaler().fit(X)
X_scaled = train_scaler.fit_transform(X)

In [6]:
# split into validation and test datasets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

(80000, 10) (20000, 10) (80000,) (20000,)


In [7]:
# determine the number of input features
n_features = X_train.shape[1]

In [8]:
# define model
model = Sequential()
# side note: use He initialisation as recommended for relu: https://arxiv.org/abs/1502.01852
model.add(Dense(40, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model.add(Dense(200, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(80, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1, activation='linear'))
# compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
# fit the model
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_data=(X_val, y_val))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f14b1ffbfd0>

In [9]:
X_test = df_test.values
X_test_scaled = train_scaler.fit_transform(X_test)

In [10]:
y_hat = model.predict(X_test_scaled)

In [11]:
df_test["Y"] = y_hat

In [13]:
df_test.to_csv("test_with_predictions.csv")