# Wibbly Wobbly Gobble Wobble Stuff

This model uses several 1d convolution layers and bidirectional LSTM layers to make it's prediction. It is a bit unnecessarily heavy but it can reach 100% validation accuracy easily as a result. There is not a whole lot of room for improvement with such a tiny dataset. This was a fun quick dataset for over the Thanksgiving break. Have a great holiday everyone.

In [None]:
import numpy as np
import pandas as pd
import os
from keras.models import Model
from keras.layers import Dense, Bidirectional, LSTM, BatchNormalization, Dropout, Input, Conv1D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Flatten
from keras.preprocessing.sequence import pad_sequences
from keras import optimizers
from sklearn.model_selection import train_test_split

# Read in the Data

In [None]:
train = pd.read_json('../input/train.json')
test = pd.read_json('../input/test.json')

In [None]:
X_train = pad_sequences(train['audio_embedding'].tolist(), maxlen=10)
X_test = pad_sequences(test['audio_embedding'].tolist(), maxlen=10)
Y_train = np.asarray(train['is_turkey'].values)


# Create the Model

In [None]:
def createmodel(): 
    inp = Input((10, 128))
    x = Conv1D(512, 10, padding='same')(inp)
    x = Conv1D(256, 5, padding='same')(x)
    x = BatchNormalization()(x)
    x = Bidirectional(LSTM(512, return_sequences=True, recurrent_dropout=0.1))(x)
    x = BatchNormalization()(x)
    x = Conv1D(256, 10, padding='same')(x)
    x = Conv1D(128, 5, padding='same')(x)
    x = Bidirectional(LSTM(512, return_sequences=True, recurrent_dropout=0.1))(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)

    return Model(inp, x)


model = createmodel()
model.compile(loss='binary_crossentropy', optimizer=optimizers.adam(lr=0.0001), metrics=['accuracy'])
print(model.summary())

# Callbacks

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', 
                                            patience=4, 
                                            verbose=1, 
                                            factor=0.5,
                                            min_lr=0.00001)

early_stopping = EarlyStopping(monitor='val_loss',
                              patience=8,
                              verbose=1,
                              mode='min',
                              restore_best_weights=True)

callback = [learning_rate_reduction, early_stopping]

# Train the Model

In [None]:
BATCH_SIZE = 16
MAX_EPOCHS = 20
CV_LOOPS = 2

pred_list = []

for i in range(CV_LOOPS):
    
    X_batch, X_val, Y_batch, Y_val = train_test_split(X_train, Y_train, test_size=0.2+(0.02*i), random_state=42+i)
    
    model = createmodel()
    model.compile(loss='binary_crossentropy',
              optimizer=optimizers.adam(lr=0.0001),
              metrics=['accuracy'])

    model.fit(X_train, Y_train,
              batch_size=BATCH_SIZE*(i+1),
              epochs=MAX_EPOCHS,
              validation_data=(X_val, Y_val),
              verbose=2,
              callbacks=callback
             )

    pred_list.append(model.predict(X_test))

# Prepare the Submission

In [None]:
sub = pd.read_csv('../input/sample_submission.csv')

ratio = 1/CV_LOOPS
pred = sub['is_turkey'].values
for x in pred_list:
    pred = pred+(x*ratio)
    
sub['is_turkey'] = pred
sub.to_csv('submission.csv', index=False)

sub[:20]