In [1]:
!git clone https://github.com/Merlinaphist/ReproduceANNA16.git

Cloning into 'ReproduceANNA16'...
remote: Enumerating objects: 110, done.[K
remote: Counting objects: 100% (31/31), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 110 (delta 2), reused 28 (delta 2), pack-reused 79[K
Receiving objects: 100% (110/110), 160.65 MiB | 13.49 MiB/s, done.
Resolving deltas: 100% (25/25), done.
Updating files: 100% (86/86), done.


In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
import tensorflow as tf
from keras import backend as kb
from statistics import mean, stdev

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [52]:
def root_mean_squared_error(y_true, y_pred):
    y_true = tf.cast(y_true,tf.float32)
    return kb.sqrt(kb.mean(kb.square(y_pred - y_true)))

def convolutional_block(X,filters,index):
    base_name = f'conv_{index}_'
    F1,F2 = filters
    # First component of main path
    x_main = tf.keras.layers.Conv1D(filters=F1,kernel_size=3,strides=1,
              padding='same',name=base_name+'main_conv0')(X)
    x_main = tf.keras.layers.BatchNormalization(name=base_name+'main_bn0')(x_main)
    x_main = tf.keras.layers.Activation('relu')(x_main)
    # Second Component of main path
    x_main = tf.keras.layers.Conv1D(filters=F2,kernel_size=3,strides=1,
              padding='same',name=base_name+'main_conv1')(x_main)
    x_main = tf.keras.layers.BatchNormalization(name=base_name+'main_bn1')(x_main)
    x_main = tf.keras.layers.Activation('relu')(x_main)
    # Shortcut path
    X_shortcut = tf.keras.layers.Conv1D(filters=F2,kernel_size=1,strides=1,
                      padding='same',name=base_name+'branch_conv')(X)
    X_shortcut = tf.keras.layers.BatchNormalization(name=base_name+'branch_bn')(X_shortcut)
    # Add
    X_out = tf.keras.layers.Add()([x_main, X_shortcut])
    X_out = tf.keras.layers.Activation('relu')(X_out)
    return X_out

def identity_block(X,filters,index):
    base_name = f'iden_{index}_'
    F1,F2 = filters
    # First component of main path
    x_main = tf.keras.layers.Conv1D(filters=F1,kernel_size=3,strides=1,
              padding='same',name=base_name+'main_conv0')(X)
    x_main = tf.keras.layers.BatchNormalization(name=base_name+'main_nb0')(x_main)
    x_main = tf.keras.layers.Activation('relu')(x_main)
    # Second Component of main path
    x_main = tf.keras.layers.Conv1D(filters=F2,kernel_size=3,strides=1,
              padding='same',name=base_name+'main_conv1')(x_main)
    x_main = tf.keras.layers.BatchNormalization(name=base_name+'main_nb1')(x_main)
    x_main = tf.keras.layers.Activation('relu')(x_main)
    # Add
    x_out = tf.keras.layers.Add()([x_main, X])
    x_out = tf.keras.layers.Activation('relu')(x_out)
    return X

def create_resnet():
    filters = [32, 64]
    inputs = tf.keras.layers.Input(shape=(2402, 4))
    x = tf.keras.layers.Conv1D(filters=64, kernel_size=6, strides=1,
              padding='same',name='conv_0_main_conv0')(inputs)
    x = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2)(x)
    for i in range(2):
        x = identity_block(x, filters, 4*i)
        x = identity_block(x, filters, 4*i+1)
        x = identity_block(x, filters, 4*i+2)
        x = convolutional_block(x, filters, 4*i+3)
    x = tf.keras.layers.AveragePooling1D()(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(1,activation='linear')(x)
    model = tf.keras.Model(inputs=inputs, outputs=x)
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss=root_mean_squared_error)
    return model


In [12]:
token2onehot = np.array([
      [0,0,0,0],
      [1,0,0,0],
      [0,1,0,0],
      [0,0,1,0],
      [0,0,0,1],
      [.25, .25, .25, .25]
    ])

In [55]:
rmse = []
for i in range(5):
    X_train = pd.read_pickle(f"ReproduceANNA16/datasets/onehot/X_train_{i}.gz")
    Y_train = pd.read_pickle(f"ReproduceANNA16/datasets/onehot/Y_train_{i}.gz")
    X_test = pd.read_pickle(f"ReproduceANNA16/datasets/onehot/X_test_{i}.gz")
    Y_test = pd.read_pickle(f"ReproduceANNA16/datasets/onehot/Y_test_{i}.gz")
    X_train = token2onehot[X_train.values]
    X_test = token2onehot[X_test.values]
    model = create_resnet()
    model.fit(X_train, Y_train, verbose=0, batch_size=32, epochs=15, validation_data=(X_test, Y_test))
    pred = model.predict(X_test, verbose=0)
    rmse.append(sqrt(mean_squared_error(Y_test,pred)))
    print(rmse[i])

0.84343665140616
0.9082168462391337
0.8799652166811593
1.1638517275822047
1.0314825757619406


In [56]:
mean(rmse)

0.9653906035341197

In [57]:
X_train = pd.read_pickle(f"ReproduceANNA16/4.final_test/datasets/X_final_train_onehot.gz")
X_test = pd.read_pickle(f"ReproduceANNA16/4.final_test/datasets/X_final_test_onehot.gz")
Y_train = pd.read_pickle(f"ReproduceANNA16/4.final_test/datasets/Y_final_train_onehot.gz")
Y_test = pd.read_pickle(f"ReproduceANNA16/4.final_test/datasets/Y_final_test_onehot.gz")
X_train = token2onehot[X_train.values]
X_test = token2onehot[X_test.values]

In [58]:
model = create_resnet()
model.fit(X_train, Y_train, verbose=0, batch_size=32, epochs=15, validation_data=(X_test, Y_test))
pred = model.predict(X_test)
sqrt(mean_squared_error(Y_test,pred))



0.8244511889474461