In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from keras.layers import Conv1D, Dense, Dropout, Input, Flatten
from keras.models import Model
from keras.callbacks import ModelCheckpoint
from tensorflow import set_random_seed

In [None]:
train_path = 'data/train.csv'
val_path = 'data/val.csv'
test_path = 'data/test.csv'
SEED = 42

In [None]:
set_random_seed(SEED)

In [None]:
df_train = pd.read_csv(train_path)
df_train.head()

In [None]:
df_val = pd.read_csv(val_path)
df_val.head()

In [None]:
df_test = pd.read_csv(test_path)
df_test.head()

In [None]:
y = list(df_train['label'])
y_train = []
for l in y:
    if l == 'scrambled':
        y_train.append(1)
    else:
        y_train.append(0)
y_train = np.array(y_train)

In [None]:
y = list(df_val['label'])
y_val = []
for l in y:
    if l == 'scrambled':
        y_val.append(1)
    else:
        y_val.append(0)
y_val = np.array(y_val)

In [None]:
del df_train, df_val, df_test, y

In [None]:
with open('train_encodings.npy', 'rb') as f:
    train_encodings = np.load(f)
with open('val_encodings.npy', 'rb') as f:
    val_encodings = np.load(f)
with open('test_encodings.npy', 'rb') as f:
    test_encodings = np.load(f)

In [None]:
train_encodings = train_encodings.reshape(train_encodings.shape[0], train_encodings.shape[1], 1)
val_encodings = val_encodings.reshape(val_encodings.shape[0], val_encodings.shape[1], 1)
test_encodings = test_encodings.reshape(test_encodings.shape[0], test_encodings.shape[1], 1)

In [None]:
layer = Input(shape=(768,1))
conv_1 = Conv1D(filters=256, kernel_size=2, padding='valid', activation='relu')(layer)
conv_2 = Conv1D(filters=256, kernel_size=2, padding='valid', activation='relu')(conv_1)
conv_3 = Conv1D(filters=256, kernel_size=2, padding='valid', activation='relu')(conv_2)
flatten = Flatten()(conv_3)
dense_1 = Dense(units=256, activation='relu')(flatten)
dense_2 = Dense(units=64, activation='relu')(dense_1)
dense_3 = Dense(units=16, activation='relu')(dense_2)
dense_4 = Dense(units=4, activation='relu')(dense_3)
out = Dense(units=1, activation='sigmoid')(dense_4)

In [None]:
model = Model(inputs=layer, outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
checkpoint = ModelCheckpoint('ckpnt_{epoch:02d}.h5', save_weights_only=True, period=2)

In [None]:
model.fit(train_encodings, y_train, batch_size=64,
          epochs=20, validation_data=(val_encodings, y_val),
          callbacks=[checkpoint])

In [None]:
model.load_weights('ckpnt_06.h5')

In [None]:
y_pred = np.array((model.predict(val_encodings) >= 0.5).astype(int))

In [None]:
precision = precision_score(y_val, y_pred, average='micro')
recall = recall_score(y_val, y_pred, average='micro')
accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='macro')

In [None]:
print(accuracy)
print(precision)
print(recall)
print(f1)

In [None]:
pred = np.array((model.predict(test_encodings) >= 0.5).astype(int))
y_test = []
for i in range(len(pred)):
    if pred[i] == 1:
        y_test.append('scrambled')
    else:
        y_test.append('unscrambled')

In [None]:
submission = pd.DataFrame(y_test)
submission.to_csv('data/submission.csv', header=['label'], index=False)