#### Bi-LSTM model

Note: Data Preprocessing and Deep LSTM model are inspired by the top solution described here:  
http://simaaron.github.io/Estimating-rainfall-from-weather-radar-readings-using-recurrent-neural-networks/  
Code from：  
https://www.kaggle.com/oldmon/lstm-models

In [1]:
import numpy as np
import pandas as pd
import os

print(os.listdir('./data'))

['sample_solution.csv', 'sample_solution.csv.zip', 'test.csv', 'test.zip', 'train.csv', 'train.zip']


In [2]:
INPUT_WIDTH = 19
N_FEATURES = 22

# taken from http://simaaron.github.io/Estimating-rainfall-from-weather-radar-readings-using-recurrent-neural-networks/，
#73mm rainfall is most appropriate value
THRESHOLD = 73

# Data preprocessing
## Training set

In [3]:
train_df = pd.read_csv('./data/train.csv')

In [4]:
train_df.describe()

Unnamed: 0,Id,minutes_past,radardist_km,Ref,Ref_5x5_10th,Ref_5x5_50th,Ref_5x5_90th,RefComposite,RefComposite_5x5_10th,RefComposite_5x5_50th,...,RhoHV_5x5_90th,Zdr,Zdr_5x5_10th,Zdr_5x5_50th,Zdr_5x5_90th,Kdp,Kdp_5x5_10th,Kdp_5x5_50th,Kdp_5x5_90th,Expected
count,13765200.0,13765200.0,13765200.0,6349375.0,5283988.0,6356482.0,7551281.0,6716343.0,5755673.0,6711663.0,...,5905584.0,4934916.0,4133154.0,4936568.0,5905584.0,4182635.0,3428782.0,4187281.0,5052776.0,13765200.0
mean,592337.0,29.52373,11.06794,22.92666,19.95227,22.61029,25.89846,24.71108,22.15824,24.42075,...,1.015272,0.5367092,-0.7190076,0.3376216,2.07287,0.03545172,-3.482325,-0.4736553,4.079836,108.6263
std,340856.1,17.30813,4.206618,10.35516,9.208166,10.053,11.10958,10.68962,9.702705,10.42453,...,0.04861606,1.510399,1.006068,0.938644,1.670194,3.869725,2.79212,2.263046,4.147337,548.6058
min,1.0,0.0,0.0,-31.0,-32.0,-32.0,-28.5,-32.0,-31.0,-27.5,...,0.2083333,-7.875,-7.875,-7.875,-7.875,-96.04,-80.79,-78.77,-100.2,0.01
25%,296897.0,15.0,9.0,16.0,14.0,16.0,18.0,17.5,16.0,17.5,...,0.9983333,-0.1875,-1.125,-0.0625,1.0625,-1.410004,-4.580002,-0.7100067,2.069992,0.2540001
50%,592199.0,30.0,11.0,22.5,20.0,22.5,25.5,24.0,22.0,24.0,...,1.011667,0.375,-0.625,0.25,1.6875,0.0,-2.820007,0.0,3.519989,1.016
75%,889582.0,44.0,14.0,29.5,26.0,29.0,33.5,31.5,28.5,31.5,...,1.051667,1.0625,-0.1875,0.6875,2.625,1.75,-1.76001,0.3499908,5.639999,3.810002
max,1180945.0,59.0,21.0,71.0,62.5,69.0,72.5,92.5,66.0,71.0,...,1.051667,7.9375,7.9375,7.9375,7.9375,179.75,3.519989,12.8,144.6,33017.73


In [5]:
train_df.head(100)

Unnamed: 0,Id,minutes_past,radardist_km,Ref,Ref_5x5_10th,Ref_5x5_50th,Ref_5x5_90th,RefComposite,RefComposite_5x5_10th,RefComposite_5x5_50th,...,RhoHV_5x5_90th,Zdr,Zdr_5x5_10th,Zdr_5x5_50th,Zdr_5x5_90th,Kdp,Kdp_5x5_10th,Kdp_5x5_50th,Kdp_5x5_90th,Expected
0,1,3,10.0,,,,,,,,...,,,,,,,,,,0.25400
1,1,16,10.0,,,,,,,,...,,,,,,,,,,0.25400
2,1,25,10.0,,,,,,,,...,,,,,,,,,,0.25400
3,1,35,10.0,,,,,,,,...,,,,,,,,,,0.25400
4,1,45,10.0,,,,,,,,...,,,,,,,,,,0.25400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,9,24,18.0,,,,,,,,...,,,,,,,,,,18.28801
96,9,28,18.0,,,,,,,,...,,,,,,,,,,18.28801
97,9,33,18.0,,,,,,,,...,,,,,,,,,,18.28801
98,9,37,18.0,,,,,,,,...,,,,,,,,,,18.28801


In [6]:
train_df[train_df.columns[1:]] = train_df[train_df.columns[1:]].astype(np.float32)

remove Ids with NaNs in Ref column for each observation(no data from radar)

In [7]:
train_ids = train_df[~np.isnan(train_df.Ref)]
train_ids.head(20)

Unnamed: 0,Id,minutes_past,radardist_km,Ref,Ref_5x5_10th,Ref_5x5_50th,Ref_5x5_90th,RefComposite,RefComposite_5x5_10th,RefComposite_5x5_50th,...,RhoHV_5x5_90th,Zdr,Zdr_5x5_10th,Zdr_5x5_50th,Zdr_5x5_90th,Kdp,Kdp_5x5_10th,Kdp_5x5_50th,Kdp_5x5_90th,Expected
6,2,1.0,2.0,9.0,5.0,7.5,10.5,15.0,10.5,16.5,...,0.998333,0.375,-0.125,0.3125,0.875,1.059998,-1.410004,-0.350006,1.059998,1.016001
7,2,6.0,2.0,26.5,22.5,25.5,31.5,26.5,26.5,28.5,...,1.005,0.0625,-0.1875,0.25,0.6875,,,,1.409988,1.016001
8,2,11.0,2.0,21.5,15.5,20.5,25.0,26.5,23.5,25.0,...,1.001667,0.3125,-0.0625,0.3125,0.625,0.349991,,-0.350006,1.759995,1.016001
9,2,16.0,2.0,18.0,14.0,17.5,21.0,20.5,18.0,20.5,...,1.001667,0.25,0.125,0.375,0.6875,0.349991,-1.059998,0.0,1.059998,1.016001
10,2,21.0,2.0,24.5,16.5,21.0,24.5,24.5,21.0,24.0,...,0.998333,0.25,0.0625,0.1875,0.5625,-0.350006,-1.059998,-0.350006,1.759995,1.016001
11,2,26.0,2.0,12.0,12.0,16.0,20.0,16.5,17.0,19.0,...,0.998333,0.5625,0.25,0.4375,0.6875,-1.76001,-1.76001,-0.350006,0.709991,1.016001
12,2,31.0,2.0,22.5,19.0,22.0,25.0,26.0,23.5,25.5,...,1.001667,0.0,-0.1875,0.25,0.625,-1.059998,-2.12001,-0.710007,0.349991,1.016001
13,2,37.0,2.0,14.0,14.0,18.5,21.0,19.5,20.0,21.0,...,0.998333,0.5,0.1875,0.4375,0.8125,0.0,-1.76001,-0.350006,1.059998,1.016001
14,2,42.0,2.0,12.0,11.0,12.5,17.0,19.5,18.0,21.0,...,0.998333,0.625,0.375,0.625,0.875,-0.350006,-0.350006,0.0,0.349991,1.016001
15,2,47.0,2.0,1.5,3.5,7.0,10.5,18.0,16.5,18.5,...,0.998333,0.375,0.1875,0.5,0.6875,0.349991,-2.110001,-0.350006,1.059998,1.016001


In [8]:
train_new = train_df[np.in1d(train_df.Id, train_ids.Id)]
train_new.head(40)

Unnamed: 0,Id,minutes_past,radardist_km,Ref,Ref_5x5_10th,Ref_5x5_50th,Ref_5x5_90th,RefComposite,RefComposite_5x5_10th,RefComposite_5x5_50th,...,RhoHV_5x5_90th,Zdr,Zdr_5x5_10th,Zdr_5x5_50th,Zdr_5x5_90th,Kdp,Kdp_5x5_10th,Kdp_5x5_50th,Kdp_5x5_90th,Expected
6,2,1.0,2.0,9.0,5.0,7.5,10.5,15.0,10.5,16.5,...,0.998333,0.375,-0.125,0.3125,0.875,1.059998,-1.410004,-0.350006,1.059998,1.016001
7,2,6.0,2.0,26.5,22.5,25.5,31.5,26.5,26.5,28.5,...,1.005,0.0625,-0.1875,0.25,0.6875,,,,1.409988,1.016001
8,2,11.0,2.0,21.5,15.5,20.5,25.0,26.5,23.5,25.0,...,1.001667,0.3125,-0.0625,0.3125,0.625,0.349991,,-0.350006,1.759995,1.016001
9,2,16.0,2.0,18.0,14.0,17.5,21.0,20.5,18.0,20.5,...,1.001667,0.25,0.125,0.375,0.6875,0.349991,-1.059998,0.0,1.059998,1.016001
10,2,21.0,2.0,24.5,16.5,21.0,24.5,24.5,21.0,24.0,...,0.998333,0.25,0.0625,0.1875,0.5625,-0.350006,-1.059998,-0.350006,1.759995,1.016001
11,2,26.0,2.0,12.0,12.0,16.0,20.0,16.5,17.0,19.0,...,0.998333,0.5625,0.25,0.4375,0.6875,-1.76001,-1.76001,-0.350006,0.709991,1.016001
12,2,31.0,2.0,22.5,19.0,22.0,25.0,26.0,23.5,25.5,...,1.001667,0.0,-0.1875,0.25,0.625,-1.059998,-2.12001,-0.710007,0.349991,1.016001
13,2,37.0,2.0,14.0,14.0,18.5,21.0,19.5,20.0,21.0,...,0.998333,0.5,0.1875,0.4375,0.8125,0.0,-1.76001,-0.350006,1.059998,1.016001
14,2,42.0,2.0,12.0,11.0,12.5,17.0,19.5,18.0,21.0,...,0.998333,0.625,0.375,0.625,0.875,-0.350006,-0.350006,0.0,0.349991,1.016001
15,2,47.0,2.0,1.5,3.5,7.0,10.5,18.0,16.5,18.5,...,0.998333,0.375,0.1875,0.5,0.6875,0.349991,-2.110001,-0.350006,1.059998,1.016001


Replace NaN values with zeros

In [9]:
train_new = train_new.fillna(0.0)
train_new = train_new.reset_index(drop = True)
train_new.head(50)

Unnamed: 0,Id,minutes_past,radardist_km,Ref,Ref_5x5_10th,Ref_5x5_50th,Ref_5x5_90th,RefComposite,RefComposite_5x5_10th,RefComposite_5x5_50th,...,RhoHV_5x5_90th,Zdr,Zdr_5x5_10th,Zdr_5x5_50th,Zdr_5x5_90th,Kdp,Kdp_5x5_10th,Kdp_5x5_50th,Kdp_5x5_90th,Expected
0,2,1.0,2.0,9.0,5.0,7.5,10.5,15.0,10.5,16.5,...,0.998333,0.375,-0.125,0.3125,0.875,1.059998,-1.410004,-0.350006,1.059998,1.016001
1,2,6.0,2.0,26.5,22.5,25.5,31.5,26.5,26.5,28.5,...,1.005,0.0625,-0.1875,0.25,0.6875,0.0,0.0,0.0,1.409988,1.016001
2,2,11.0,2.0,21.5,15.5,20.5,25.0,26.5,23.5,25.0,...,1.001667,0.3125,-0.0625,0.3125,0.625,0.349991,0.0,-0.350006,1.759995,1.016001
3,2,16.0,2.0,18.0,14.0,17.5,21.0,20.5,18.0,20.5,...,1.001667,0.25,0.125,0.375,0.6875,0.349991,-1.059998,0.0,1.059998,1.016001
4,2,21.0,2.0,24.5,16.5,21.0,24.5,24.5,21.0,24.0,...,0.998333,0.25,0.0625,0.1875,0.5625,-0.350006,-1.059998,-0.350006,1.759995,1.016001
5,2,26.0,2.0,12.0,12.0,16.0,20.0,16.5,17.0,19.0,...,0.998333,0.5625,0.25,0.4375,0.6875,-1.76001,-1.76001,-0.350006,0.709991,1.016001
6,2,31.0,2.0,22.5,19.0,22.0,25.0,26.0,23.5,25.5,...,1.001667,0.0,-0.1875,0.25,0.625,-1.059998,-2.12001,-0.710007,0.349991,1.016001
7,2,37.0,2.0,14.0,14.0,18.5,21.0,19.5,20.0,21.0,...,0.998333,0.5,0.1875,0.4375,0.8125,0.0,-1.76001,-0.350006,1.059998,1.016001
8,2,42.0,2.0,12.0,11.0,12.5,17.0,19.5,18.0,21.0,...,0.998333,0.625,0.375,0.625,0.875,-0.350006,-0.350006,0.0,0.349991,1.016001
9,2,47.0,2.0,1.5,3.5,7.0,10.5,18.0,16.5,18.5,...,0.998333,0.375,0.1875,0.5,0.6875,0.349991,-2.110001,-0.350006,1.059998,1.016001


Define and exclude outliers from training set

In [10]:
df_tmp = pd.DataFrame(train_new.groupby('Id')['Expected'].mean())
meaningful_ids = np.array(df_tmp[df_tmp['Expected'] < THRESHOLD].index)

train_final = train_new[np.in1d(train_new.Id, meaningful_ids)]
train_final.shape

(8926102, 24)

In [11]:
del df_tmp, train_new, meaningful_ids

Grouping and padding into sequences

In [12]:
train_gp = train_final.groupby('Id')
train_size = len(train_gp)

X_train = np.zeros((train_size, INPUT_WIDTH,N_FEATURES), dtype = np.float32)
y_train = np.zeros(train_size,dtype = np.float32)
seq_len_train = np.zeros(train_size, dtype = np.float32)

i = 0 
for _, group in train_gp:
    X = group.values
    seq_len = X.shape[0]
    X_train[i, :seq_len,:] = X[:, 1:23]
    if i == 0:
        print(pd.DataFrame(X))
    y_train[i] = X[0,23]
    seq_len_train[i] = seq_len
    i += 1
    del X

del train_gp
X_train.shape, y_train.shape

     0     1    2     3     4     5     6     7     8     9   ...        14  \
0   2.0   1.0  2.0   9.0   5.0   7.5  10.5  15.0  10.5  16.5  ...  0.998333   
1   2.0   6.0  2.0  26.5  22.5  25.5  31.5  26.5  26.5  28.5  ...  1.005000   
2   2.0  11.0  2.0  21.5  15.5  20.5  25.0  26.5  23.5  25.0  ...  1.001667   
3   2.0  16.0  2.0  18.0  14.0  17.5  21.0  20.5  18.0  20.5  ...  1.001667   
4   2.0  21.0  2.0  24.5  16.5  21.0  24.5  24.5  21.0  24.0  ...  0.998333   
5   2.0  26.0  2.0  12.0  12.0  16.0  20.0  16.5  17.0  19.0  ...  0.998333   
6   2.0  31.0  2.0  22.5  19.0  22.0  25.0  26.0  23.5  25.5  ...  1.001667   
7   2.0  37.0  2.0  14.0  14.0  18.5  21.0  19.5  20.0  21.0  ...  0.998333   
8   2.0  42.0  2.0  12.0  11.0  12.5  17.0  19.5  18.0  21.0  ...  0.998333   
9   2.0  47.0  2.0   1.5   3.5   7.0  10.5  18.0  16.5  18.5  ...  0.998333   
10  2.0  53.0  2.0  16.0  14.5  18.0  23.5  28.0  23.5  26.5  ...  0.998333   
11  2.0  58.0  2.0  22.0  16.5  22.5  26.5  31.5  26

((714838, 19, 22), (714838,))

## Test Set

In [13]:
test_df = pd.read_csv('./data/test.csv')
test_df[test_df.columns[1:]] = test_df[test_df.columns[1:]].astype(np.float32)

test_ids = np.array(test_df.Id.unique())

#Convert all NaN to zeros
test_final = test_df.fillna(0.0)
test_final = test_final.reset_index(drop = True)


In [14]:
test_final

Unnamed: 0,Id,minutes_past,radardist_km,Ref,Ref_5x5_10th,Ref_5x5_50th,Ref_5x5_90th,RefComposite,RefComposite_5x5_10th,RefComposite_5x5_50th,...,RhoHV_5x5_50th,RhoHV_5x5_90th,Zdr,Zdr_5x5_10th,Zdr_5x5_50th,Zdr_5x5_90th,Kdp,Kdp_5x5_10th,Kdp_5x5_50th,Kdp_5x5_90th
0,1,1.0,8.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,5.0,8.0,10.0,0.0,10.0,18.0,11.5,0.0,11.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,8.0,8.0,0.0,0.0,7.0,14.5,0.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,12.0,8.0,14.0,0.0,9.0,16.0,14.0,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1,15.0,8.0,10.5,0.0,9.0,15.5,13.5,0.0,9.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8022751,717625,40.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8022752,717625,44.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8022753,717625,48.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8022754,717625,52.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
test_gp = test_final.groupby('Id')
test_size = len(test_gp)

X_test = np.zeros((test_size, INPUT_WIDTH, N_FEATURES),dtype = np.float32)
seq_len_test = np.zeros(test_size, dtype = np.float32)

i = 0
for _,group in test_gp:
    X = group.values
    seq_len = X.shape[0]
    X_test[i, :seq_len,:] = X[:,1:23]
    seq_len_test[i] = seq_len
    i += 1
    del X
del test_gp
X_test.shape

(717625, 19, 22)

## Models

In [16]:
import tensorflow as tf
from tensorflow.compat.v1.keras.layers import Input, Dense, CuDNNLSTM,LSTM, AveragePooling1D, TimeDistributed, Flatten, Bidirectional
from tensorflow.compat.v1.keras.models import Model


In [17]:
from tensorflow.compat.v1.keras.callbacks import EarlyStopping
es_callback = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 5)

In [18]:
BATCH_SIZE = 1024
N_EPOCHS = 32

## Simple LSTM

In [19]:
def get_model_simple(shape = (19, 22)):
    inp = Input(shape)
    x = CuDNNLSTM(64, return_sequences = False)(inp)
    #x = LSTM(64, return_sequences = False)(inp)
    x = Dense(1)(x)
    
    model = Model(inp, x)
    return model

In [20]:
model_0 = get_model_simple((19, 22))
model_0.compile(optimizer = 'adadelta', loss='mae')
model_0.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 19, 22)]          0         
_________________________________________________________________
cu_dnnlstm (CuDNNLSTM)       (None, 64)                22528     
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 22,593
Trainable params: 22,593
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_0.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs = N_EPOCHS, validation_split = 0.2, callbacks = [es_callback])

Epoch 1/32

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
y_pred_0 = model_0.predict(X_test)
submission_0 = pd.DataFrame({'Id': test_ids, 'Expected': y_pred_0.reshape(-1)})
submission_0.to_csv('submission_0.csv', index = False)

## Simple LSTM + TimeDistributed
LSTM model with return_sequences=True and TimeDistributed layer

In [24]:
def get_model_seq(shape = (19, 22)):
    inp = Input(shape)
    #x = CuDNNLSTM(64, return_sequences = True)(inp)
    x = LSTM(64, return_sequences = True)(inp)
    x = TimeDistributed(Dense(10))(x)
    x = Flatten()(x)
    x = Dense(1)(x)
    
    model = Model(inp, x)
    return model

In [25]:
model_1 = get_model_seq((19, 22))
model_1.compile(optimizer = 'adadelta', loss = 'mae')
model_1.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 19, 22)]          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 19, 64)            22272     
_________________________________________________________________
time_distributed (TimeDistri (None, 19, 10)            650       
_________________________________________________________________
flatten (Flatten)            (None, 190)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 191       
Total params: 23,113
Trainable params: 23,113
Non-trainable params: 0
_________________________________________________________________


In [26]:

model_1.fit(X_train,y_train, batch_size = BATCH_SIZE, epochs = N_EPOCHS,validation_split = 0.2, callbacks = [es_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<tensorflow.python.keras.callbacks.History at 0x18ed9918700>

In [27]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 735877565123946980
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 18427903260370029007
physical_device_desc: "device: XLA_CPU device"
]


In [28]:
y_pred_1 = model_1.predict(X_test)
submission_1 = pd.DataFrame({'Id': test_ids, 'Expected': y_pred_1.reshape(-1)})
submission_1.to_csv('submission_1.csv', index = False)

## Bi-direction LSTM
Adding Bi-directional wrapper around LSTM

In [29]:
def get_model_bilstm(shape = (19, 22)):
    inp = Input(shape)
#     x = Bidirectional(CuDNNLSTM(64, return_sequences = True))(inp)
    x = Bidirectional(LSTM(64, return_sequences = True))(inp)
    x = TimeDistributed(Dense(10))(x)
    x = Flatten()(x)
    x = Dense(1)(x)
    
    model = Model(inp, x)
    return model

In [30]:
model_2 = get_model_bilstm((19, 22))
model_2.compile(optimizer = 'adadelta', loss = 'mae')
model_2.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 19, 22)]          0         
_________________________________________________________________
bidirectional (Bidirectional (None, 19, 128)           44544     
_________________________________________________________________
time_distributed_1 (TimeDist (None, 19, 10)            1290      
_________________________________________________________________
flatten_1 (Flatten)          (None, 190)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 191       
Total params: 46,025
Trainable params: 46,025
Non-trainable params: 0
_________________________________________________________________


In [33]:
model_2.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs = N_EPOCHS, validation_split = 0.2, callbacks = [es_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<tensorflow.python.keras.callbacks.History at 0x18f57b751c0>

In [34]:
y_pred_2 = model_2.predict(X_test)
submission_2 = pd.DataFrame({'Id': test_ids, 'Expected': y_pred_2.reshape(-1)})
submission_2.to_csv('submission_2.csv', index=False)

## Deep model
Deep NN inspired by the top solution

In [37]:
def get_model_deep(shape = (19, 22)):
    inp = Input(shape)
    x = Dense(16)(inp)
#     x = Bidirectional(CuDNNLSTM(64, return_sequences = True))(x)
    x = Bidirectional(LSTM(64, return_sequences = True))(x)
    x = TimeDistributed(Dense(64))(x)
#     x = Bidirectional(CuDNNLSTM(128, return_sequences = True))(x)
    x = Bidirectional(LSTM(64, return_sequences = True))(x)
    x = TimeDistributed(Dense(1))(x)
    x = AveragePooling1D()(x)
    x = Flatten()(x)
    x = Dense(1)(x)
    
    model = Model(inp, x)
    return model

In [38]:
model_3 = get_model_deep((19, 22))
model_3.compile(optimizer = 'adadelta', loss = 'mae')
model_3.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 19, 22)]          0         
_________________________________________________________________
dense_8 (Dense)              (None, 19, 16)            368       
_________________________________________________________________
bidirectional_3 (Bidirection (None, 19, 128)           41472     
_________________________________________________________________
time_distributed_4 (TimeDist (None, 19, 64)            8256      
_________________________________________________________________
bidirectional_4 (Bidirection (None, 19, 128)           66048     
_________________________________________________________________
time_distributed_5 (TimeDist (None, 19, 1)             129       
_________________________________________________________________
average_pooling1d (AveragePo (None, 9, 1)             

In [39]:
model_3.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs = N_EPOCHS, validation_split = 0.2, callbacks = [es_callback])

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<tensorflow.python.keras.callbacks.History at 0x1902e45d4c0>

In [40]:
y_pred_3 = model_3.predict(X_test)
submission_3 = pd.DataFrame({'Id': test_ids, 'Expected': y_pred_3.reshape(-1)})
submission_3.to_csv('submission_3.csv', index = False)

## Stacking
simple average over all models

In [41]:
y_pred_acg = (y_pred_0 + y_pred_1 + y_pred_2 + y_pred_3)/4
submission_avg = pd.DataFrame({'Id': test_ids, 'Expected': y_pred_avg.reshape(-1)})
submission_avg.to_csv('submission_avg.csv', index = False)

NameError: name 'y_pred_avg' is not defined