In [1]:
import warnings
warnings.filterwarnings('ignore')

#Basic data manipulation libraries
import pandas as pd, numpy as np
import math, json, gc, random, os, sys
from matplotlib import pyplot as plt
from tqdm import tqdm

#Deep Learning Libraries
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L

#Library for model evaluation
from sklearn.model_selection import train_test_split, KFold

In [2]:
train = pd.read_json('../input/stanford-covid-vaccine/train.json', lines=True)
test = pd.read_json('../input/stanford-covid-vaccine/test.json', lines=True)
sample_sub = pd.read_csv('../input/stanford-covid-vaccine/sample_submission.csv')

print('Train shapes: ', train.shape)
print('Test shapes: ', test.shape)

Train shapes:  (2400, 19)
Test shapes:  (3634, 7)


In [3]:
target_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

token2int = {x:i for i, x in enumerate('().ACGUBEHIMSX')}

def get_pair_index_structure(structure):
    structure = np.array([struc for struc in structure], dtype="<U4")

    open_index = np.where(structure == "(")[0]
    closed_index = np.where(structure == ")")[0]

    structure[open_index] = range(0, len(open_index))
    structure[closed_index] = range(len(open_index)-1, -1, -1)
    structure[structure == "."] = -1
    structure = structure.astype(int)

    pair_structure = np.array([-1]*len(structure))
    for i in range(len(open_index)):
        start, end = np.where(structure == i)[0]
        pair_structure[start] = end
        pair_structure[end] = start    
        
    return pair_structure

def preprocess_inputs(df, cols=['sequence', 'structure', 'predicted_loop_type']):
    return np.transpose(
        np.array(df[cols].applymap(lambda seq: [token2int[x] for x in seq]).values.tolist()),
        (0, 2, 1))

train_inputs = preprocess_inputs(train[train.signal_to_noise > 1])
train_labels = np.array(train[train.signal_to_noise > 1][target_cols].values.tolist()).transpose((0, 2, 1))

def MCRMSE(y_true, y_pred):
    colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)
    return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)

In [4]:
def gru_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.GRU(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def lstm_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.LSTM(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def build_model(gru=1,seq_len=107, pred_len=68, dropout=0.5,
                embed_dim=75, hidden_dim=128):
    
    inputs = tf.keras.layers.Input(shape=(seq_len, 3))

    embed = tf.keras.layers.Embedding(input_dim=len(token2int), output_dim=embed_dim)(inputs)
    reshaped = tf.reshape(
        embed, shape=(-1, embed.shape[1],  embed.shape[2] * embed.shape[3]))
    
    reshaped = tf.keras.layers.SpatialDropout1D(.2)(reshaped)
    
    if gru==1:
        hidden = gru_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        
    elif gru==0:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==3:
        hidden = gru_layer(hidden_dim, dropout)(reshaped)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==4:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
    elif gru==5:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==6:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
    
    #only making predictions on the first part of each sequence
    truncated = hidden[:, :pred_len]
    
    out = tf.keras.layers.Dense(5, activation='linear')(truncated)

    model = tf.keras.Model(inputs=inputs, outputs=out)

    #some optimizers
    adam = tf.optimizers.Adam()
    radam = tfa.optimizers.RectifiedAdam()
    lookahead = tfa.optimizers.Lookahead(adam, sync_period=6)
    ranger = tfa.optimizers.Lookahead(radam, sync_period=6)
    
    model.compile(optimizer = adam, loss=MCRMSE)
    
    return model

In [5]:
train_inputs, val_inputs, train_labels, val_labels = train_test_split(train_inputs, train_labels, test_size=.1, random_state=34)

In [6]:
lr_callback = tf.keras.callbacks.ReduceLROnPlateau()

gru = build_model(gru=1)
sv_gru = tf.keras.callbacks.ModelCheckpoint('model_gru.h5')

history_gru = gru.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_gru],
    verbose = 2
)

print(f"Min training loss={min(history_gru.history['loss'])}, min validation loss={min(history_gru.history['val_loss'])}")

Epoch 1/100
30/30 - 23s - loss: 0.4643 - val_loss: 0.4053
Epoch 2/100
30/30 - 21s - loss: 0.4004 - val_loss: 0.3797
Epoch 3/100
30/30 - 21s - loss: 0.3846 - val_loss: 0.3614
Epoch 4/100
30/30 - 21s - loss: 0.3688 - val_loss: 0.3476
Epoch 5/100
30/30 - 21s - loss: 0.3553 - val_loss: 0.3362
Epoch 6/100
30/30 - 21s - loss: 0.3461 - val_loss: 0.3315
Epoch 7/100
30/30 - 21s - loss: 0.3393 - val_loss: 0.3296
Epoch 8/100
30/30 - 21s - loss: 0.3358 - val_loss: 0.3182
Epoch 9/100
30/30 - 21s - loss: 0.3285 - val_loss: 0.3142
Epoch 10/100
30/30 - 22s - loss: 0.3241 - val_loss: 0.3094
Epoch 11/100
30/30 - 22s - loss: 0.3195 - val_loss: 0.3048
Epoch 12/100
30/30 - 21s - loss: 0.3143 - val_loss: 0.2993
Epoch 13/100
30/30 - 21s - loss: 0.3099 - val_loss: 0.2958
Epoch 14/100
30/30 - 21s - loss: 0.3058 - val_loss: 0.2904
Epoch 15/100
30/30 - 21s - loss: 0.3019 - val_loss: 0.2865
Epoch 16/100
30/30 - 21s - loss: 0.2980 - val_loss: 0.2847
Epoch 17/100
30/30 - 21s - loss: 0.2944 - val_loss: 0.2819
Epoch 

In [7]:
lstm = build_model(gru=0)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_lstm.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 25s - loss: 0.4926 - val_loss: 0.4276
Epoch 2/100
30/30 - 24s - loss: 0.4162 - val_loss: 0.3942
Epoch 3/100
30/30 - 24s - loss: 0.3920 - val_loss: 0.3738
Epoch 4/100
30/30 - 23s - loss: 0.3793 - val_loss: 0.3643
Epoch 5/100
30/30 - 24s - loss: 0.3677 - val_loss: 0.3496
Epoch 6/100
30/30 - 24s - loss: 0.3553 - val_loss: 0.3373
Epoch 7/100
30/30 - 24s - loss: 0.3466 - val_loss: 0.3315
Epoch 8/100
30/30 - 24s - loss: 0.3393 - val_loss: 0.3236
Epoch 9/100
30/30 - 24s - loss: 0.3332 - val_loss: 0.3199
Epoch 10/100
30/30 - 24s - loss: 0.3267 - val_loss: 0.3131
Epoch 11/100
30/30 - 24s - loss: 0.3214 - val_loss: 0.3128
Epoch 12/100
30/30 - 24s - loss: 0.3176 - val_loss: 0.3042
Epoch 13/100
30/30 - 24s - loss: 0.3124 - val_loss: 0.3065
Epoch 14/100
30/30 - 24s - loss: 0.3078 - val_loss: 0.2946
Epoch 15/100
30/30 - 24s - loss: 0.3033 - val_loss: 0.2925
Epoch 16/100
30/30 - 24s - loss: 0.3006 - val_loss: 0.2883
Epoch 17/100
30/30 - 24s - loss: 0.2948 - val_loss: 0.2842
Epoch 

In [8]:
lstm = build_model(gru=3)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb1.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 25s - loss: 0.4749 - val_loss: 0.4046
Epoch 2/100
30/30 - 23s - loss: 0.3985 - val_loss: 0.3787
Epoch 3/100
30/30 - 23s - loss: 0.3825 - val_loss: 0.3667
Epoch 4/100
30/30 - 23s - loss: 0.3712 - val_loss: 0.3500
Epoch 5/100
30/30 - 23s - loss: 0.3547 - val_loss: 0.3360
Epoch 6/100
30/30 - 24s - loss: 0.3420 - val_loss: 0.3276
Epoch 7/100
30/30 - 23s - loss: 0.3350 - val_loss: 0.3199
Epoch 8/100
30/30 - 23s - loss: 0.3270 - val_loss: 0.3109
Epoch 9/100
30/30 - 23s - loss: 0.3201 - val_loss: 0.3041
Epoch 10/100
30/30 - 23s - loss: 0.3127 - val_loss: 0.3038
Epoch 11/100
30/30 - 24s - loss: 0.3086 - val_loss: 0.2932
Epoch 12/100
30/30 - 23s - loss: 0.3024 - val_loss: 0.2919
Epoch 13/100
30/30 - 23s - loss: 0.2979 - val_loss: 0.2864
Epoch 14/100
30/30 - 23s - loss: 0.2931 - val_loss: 0.2845
Epoch 15/100
30/30 - 23s - loss: 0.2893 - val_loss: 0.2749
Epoch 16/100
30/30 - 24s - loss: 0.2839 - val_loss: 0.2728
Epoch 17/100
30/30 - 23s - loss: 0.2788 - val_loss: 0.2674
Epoch 

In [9]:
lstm = build_model(gru=4)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb2.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 24s - loss: 0.4707 - val_loss: 0.4010
Epoch 2/100
30/30 - 22s - loss: 0.3983 - val_loss: 0.3751
Epoch 3/100
30/30 - 22s - loss: 0.3829 - val_loss: 0.3623
Epoch 4/100
30/30 - 22s - loss: 0.3673 - val_loss: 0.3463
Epoch 5/100
30/30 - 23s - loss: 0.3559 - val_loss: 0.3394
Epoch 6/100
30/30 - 22s - loss: 0.3492 - val_loss: 0.3397
Epoch 7/100
30/30 - 22s - loss: 0.3444 - val_loss: 0.3275
Epoch 8/100
30/30 - 23s - loss: 0.3367 - val_loss: 0.3231
Epoch 9/100
30/30 - 22s - loss: 0.3322 - val_loss: 0.3173
Epoch 10/100
30/30 - 23s - loss: 0.3272 - val_loss: 0.3128
Epoch 11/100
30/30 - 22s - loss: 0.3233 - val_loss: 0.3092
Epoch 12/100
30/30 - 22s - loss: 0.3184 - val_loss: 0.3026
Epoch 13/100
30/30 - 23s - loss: 0.3134 - val_loss: 0.3000
Epoch 14/100
30/30 - 22s - loss: 0.3086 - val_loss: 0.2931
Epoch 15/100
30/30 - 22s - loss: 0.3043 - val_loss: 0.2899
Epoch 16/100
30/30 - 22s - loss: 0.2997 - val_loss: 0.2849
Epoch 17/100
30/30 - 22s - loss: 0.2951 - val_loss: 0.2814
Epoch 

In [10]:
lstm = build_model(gru=5)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb3.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 25s - loss: 0.4753 - val_loss: 0.4092
Epoch 2/100
30/30 - 24s - loss: 0.3999 - val_loss: 0.3840
Epoch 3/100
30/30 - 24s - loss: 0.3858 - val_loss: 0.3727
Epoch 4/100
30/30 - 23s - loss: 0.3731 - val_loss: 0.3514
Epoch 5/100
30/30 - 24s - loss: 0.3566 - val_loss: 0.3372
Epoch 6/100
30/30 - 23s - loss: 0.3456 - val_loss: 0.3311
Epoch 7/100
30/30 - 24s - loss: 0.3384 - val_loss: 0.3228
Epoch 8/100
30/30 - 23s - loss: 0.3331 - val_loss: 0.3181
Epoch 9/100
30/30 - 23s - loss: 0.3281 - val_loss: 0.3158
Epoch 10/100
30/30 - 24s - loss: 0.3231 - val_loss: 0.3076
Epoch 11/100
30/30 - 23s - loss: 0.3175 - val_loss: 0.3037
Epoch 12/100
30/30 - 24s - loss: 0.3113 - val_loss: 0.2992
Epoch 13/100
30/30 - 23s - loss: 0.3058 - val_loss: 0.2911
Epoch 14/100
30/30 - 23s - loss: 0.3005 - val_loss: 0.2857
Epoch 15/100
30/30 - 24s - loss: 0.2964 - val_loss: 0.2821
Epoch 16/100
30/30 - 23s - loss: 0.2910 - val_loss: 0.2751
Epoch 17/100
30/30 - 24s - loss: 0.2843 - val_loss: 0.2700
Epoch 

In [None]:
lstm = build_model(gru=6)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb4.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 25s - loss: 0.4802 - val_loss: 0.4156
Epoch 2/100
30/30 - 23s - loss: 0.4011 - val_loss: 0.3815
Epoch 3/100
30/30 - 23s - loss: 0.3833 - val_loss: 0.3696
Epoch 4/100
30/30 - 23s - loss: 0.3705 - val_loss: 0.3497
Epoch 5/100
30/30 - 23s - loss: 0.3548 - val_loss: 0.3384
Epoch 6/100
30/30 - 24s - loss: 0.3442 - val_loss: 0.3290
Epoch 7/100
30/30 - 23s - loss: 0.3371 - val_loss: 0.3212
Epoch 8/100
30/30 - 23s - loss: 0.3301 - val_loss: 0.3167
Epoch 9/100
30/30 - 23s - loss: 0.3241 - val_loss: 0.3103
Epoch 10/100
30/30 - 23s - loss: 0.3202 - val_loss: 0.3063
Epoch 11/100
30/30 - 24s - loss: 0.3137 - val_loss: 0.3016
Epoch 12/100
30/30 - 23s - loss: 0.3099 - val_loss: 0.2961
Epoch 13/100
30/30 - 23s - loss: 0.3046 - val_loss: 0.2895
Epoch 14/100
30/30 - 23s - loss: 0.2999 - val_loss: 0.2841
Epoch 15/100
30/30 - 23s - loss: 0.2937 - val_loss: 0.2794
Epoch 16/100
30/30 - 24s - loss: 0.2904 - val_loss: 0.2724
Epoch 17/100
30/30 - 23s - loss: 0.2838 - val_loss: 0.2681
Epoch 

In [None]:
public_df = test.query("seq_length == 107").copy()
private_df = test.query("seq_length == 130").copy()

public_inputs = preprocess_inputs(public_df)
private_inputs = preprocess_inputs(private_df)

# build all models
gru_short = build_model(gru=1, seq_len=107, pred_len=107)
gru_long = build_model(gru=1, seq_len=130, pred_len=130)
lstm_short = build_model(gru=0, seq_len=107, pred_len=107)
lstm_long = build_model(gru=0, seq_len=130, pred_len=130)
hyb1_short = build_model(gru=3, seq_len=107, pred_len=107)
hyb1_long = build_model(gru=3, seq_len=130, pred_len=130)
hyb2_short = build_model(gru=4, seq_len=107, pred_len=107)
hyb2_long = build_model(gru=4, seq_len=130, pred_len=130)
hyb3_short = build_model(gru=5, seq_len=107, pred_len=107)
hyb3_long = build_model(gru=5, seq_len=130, pred_len=130)
hyb4_short = build_model(gru=6, seq_len=107, pred_len=107)
hyb4_long = build_model(gru=6, seq_len=130, pred_len=130)


# load pre-trained model weights
gru_short.load_weights('model_gru.h5')
gru_long.load_weights('model_gru.h5')
lstm_short.load_weights('model_lstm.h5')
lstm_long.load_weights('model_lstm.h5')
hyb1_short.load_weights('model_hyb1.h5')
hyb1_long.load_weights('model_hyb1.h5')
hyb2_short.load_weights('model_hyb2.h5')
hyb2_long.load_weights('model_hyb2.h5')
hyb3_short.load_weights('model_hyb3.h5')
hyb3_long.load_weights('model_hyb3.h5')
hyb4_short.load_weights('model_hyb4.h5')
hyb4_long.load_weights('model_hyb4.h5')

# and predict
gru_public_preds = gru_short.predict(public_inputs)
gru_private_preds = gru_long.predict(private_inputs)
lstm_public_preds = lstm_short.predict(public_inputs)
lstm_private_preds = lstm_long.predict(private_inputs)
hyb1_public_preds = hyb1_short.predict(public_inputs)
hyb1_private_preds = hyb1_long.predict(private_inputs)
hyb2_public_preds = hyb2_short.predict(public_inputs)
hyb2_private_preds = hyb2_long.predict(private_inputs)
hyb3_public_preds = hyb3_short.predict(public_inputs)
hyb3_private_preds = hyb3_long.predict(private_inputs)
hyb4_public_preds = hyb4_short.predict(public_inputs)
hyb4_private_preds = hyb4_long.predict(private_inputs)

preds_gru = []

for df, preds in [(public_df, gru_public_preds), (private_df, gru_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_gru.append(single_df)

preds_gru_df = pd.concat(preds_gru)

preds_lstm = []

for df, preds in [(public_df, lstm_public_preds), (private_df, lstm_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_lstm.append(single_df)

preds_lstm_df = pd.concat(preds_lstm)


preds_hyb1 = []

for df, preds in [(public_df, hyb1_public_preds), (private_df, hyb1_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb1.append(single_df)

preds_hyb1_df = pd.concat(preds_hyb1)


preds_hyb2 = []

for df, preds in [(public_df, hyb2_public_preds), (private_df, hyb2_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb2.append(single_df)

preds_hyb2_df = pd.concat(preds_hyb2)

preds_hyb3 = []

for df, preds in [(public_df, hyb3_public_preds), (private_df, hyb3_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb3.append(single_df)

preds_hyb3_df = pd.concat(preds_hyb3)

preds_hyb4 = []

for df, preds in [(public_df, hyb4_public_preds), (private_df, hyb4_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb4.append(single_df)

preds_hyb4_df = pd.concat(preds_hyb4)

blend_preds_df = pd.DataFrame()
blend_preds_df['id_seqpos'] = preds_gru_df['id_seqpos']
blend_preds_df['reactivity'] = 0.2*preds_gru_df['reactivity'] + 0.2*preds_lstm_df['reactivity'] + 0.2*preds_hyb1_df['reactivity'] + 0.2*preds_hyb2_df['reactivity'] + 0.1*preds_hyb3_df['reactivity']+0.1*preds_hyb4_df['reactivity']
blend_preds_df['deg_Mg_pH10'] = 0.2*preds_gru_df['deg_Mg_pH10'] + 0.2*preds_lstm_df['deg_Mg_pH10'] + 0.2*preds_hyb1_df['deg_Mg_pH10'] + 0.2*preds_hyb2_df['deg_Mg_pH10'] + 0.1*preds_hyb3_df['deg_Mg_pH10']+ 0.1*preds_hyb4_df['deg_Mg_pH10']
blend_preds_df['deg_pH10'] = 0.2*preds_gru_df['deg_pH10'] + 0.2*preds_lstm_df['deg_pH10'] + 0.2*preds_hyb1_df['deg_pH10'] + 0.2*preds_hyb2_df['deg_pH10'] + 0.1*preds_hyb3_df['deg_pH10']+0.1*preds_hyb4_df['deg_pH10']
blend_preds_df['deg_Mg_50C'] = 0.2*preds_gru_df['deg_Mg_50C'] + 0.2*preds_lstm_df['deg_Mg_50C'] + 0.2*preds_hyb1_df['deg_Mg_50C'] + 0.2*preds_hyb2_df['deg_Mg_50C'] + 0.1*preds_hyb3_df['deg_Mg_50C']+0.1*preds_hyb4_df['deg_Mg_50C']
blend_preds_df['deg_50C'] = 0.2*preds_gru_df['deg_50C'] + 0.2*preds_lstm_df['deg_50C'] + 0.2*preds_hyb1_df['deg_50C'] + 0.2*preds_hyb2_df['deg_Mg_50C'] + 0.1*preds_hyb3_df['deg_Mg_50C']+0.1*preds_hyb4_df['deg_Mg_50C']

submission = sample_sub[['id_seqpos']].merge(blend_preds_df, on=['id_seqpos'])

In [None]:
submission.head()

In [None]:
#Saving the final output filej
submission.to_csv('submission32.csv', index=False)