In [35]:
import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers

In [36]:

train_data = pd.read_csv("train.csv",index_col = 'id')
test_data = pd.read_csv("test.csv",index_col = 'id')

In [37]:
train_data = train_data.fillna(train_data.mean())
test_data = test_data.fillna(train_data.mean())

In [38]:
Y_train = train_data['loss'].copy()
X_train = train_data.copy().drop('loss', axis = 1)

In [39]:
model = keras.Sequential([
        layers.Input(shape = (100,)),
        #layers.Embedding(400, 8, embeddings_regularizer='l2', input_length=75),
        #layers.Conv1D(32, 3, activation='relu', padding='same', input_shape=(100,1)),
        #layers.Flatten(),
        #layers.Dropout(0.2),
        #layers.LayerNormalization(),
        layers.BatchNormalization(),
        layers.Dense(units = 512, activation ='tanh',kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(units = 256, activation ='tanh',kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(units = 128, activation ='tanh',kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(units = 32, activation ='tanh',kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4)),
        layers.BatchNormalization(),
        layers.Dropout(0.2),
        layers.Dense(1, activation = 'elu'),
    ])
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_20 (Batc (None, 100)               400       
_________________________________________________________________
dense_20 (Dense)             (None, 512)               51712     
_________________________________________________________________
batch_normalization_21 (Batc (None, 512)               2048      
_________________________________________________________________
dropout_16 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_21 (Dense)             (None, 256)               131328    
_________________________________________________________________
batch_normalization_22 (Batc (None, 256)               1024      
_________________________________________________________________
dropout_17 (Dropout)         (None, 256)              

In [40]:
X_train_split, X_val_split, Y_train_split, Y_val_split = train_test_split(X_train, Y_train, test_size = 0.1, random_state = 100, stratify = Y_train)

In [41]:
early_stopping = callbacks.EarlyStopping(
    monitor="root_mean_squared_error",
    patience=10,
    min_delta=0.01,
    restore_best_weights=True,
)

In [42]:
model.compile(loss='mse', optimizer = keras.optimizers.Adam(learning_rate=0.005),  metrics=[tf.keras.metrics.RootMeanSquaredError()])
history = model.fit(X_train_split, Y_train_split,
          batch_size = 256, epochs = 100,
          validation_data=(X_val_split, Y_val_split),
          callbacks=[early_stopping],
           )
score = model.evaluate(X_val_split, Y_val_split, verbose = 0)
print('Test loss: {}'.format(score[0]))

Train on 225000 samples, validate on 25000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Test loss: 64.62178061401367


In [43]:
predicts = model.predict(test_data)





In [44]:

output = pd.DataFrame(predicts, columns = ['loss'])
output['id'] = test_data.index
output.to_csv('submission.csv', index=False)
