In [None]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
downloaded = drive.CreateFile({'id':'15-Fjsqz7B83lc4pFBcc9TbGFF8Oasmd_'})
downloaded.GetContentFile('train.json')

In [None]:
download1 = drive.CreateFile({'id':'1Bx3SuceygcqfaxWhzKLB6o9SRGItssjp'})
downloaded.GetContentFile('test.json')

In [None]:
download2 = drive.CreateFile({'id':'1Bx3SuceygcqfaxWhzKLB6o9SRGItssjp'})
downloaded.GetContentFile('sample_submission.csv')

In [None]:
import warnings
warnings.filterwarnings('ignore')

#Basic data manipulation libraries
import pandas as pd, numpy as np
import math, json, gc, random, os, sys
from matplotlib import pyplot as plt
from tqdm import tqdm

#Deep Learning Libraries
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L

#Library for model evaluation
from sklearn.model_selection import train_test_split, KFold

In [None]:
train = pd.read_json(os.path.join('train.json') , lines = True)
test = pd.read_json(os.path.join('test.json') , lines = True)
sample_sub = pd.read_csv(os.path.join('sample_submission.csv'))


target_cols = ['reactivity', 'deg_Mg_pH10', 'deg_pH10', 'deg_Mg_50C', 'deg_50C']

token2int = {x:i for i, x in enumerate('().ACGUBEHIMSX')}

def get_pair_index_structure(structure):
    structure = np.array([struc for struc in structure], dtype="<U4")

    open_index = np.where(structure == "(")[0]
    closed_index = np.where(structure == ")")[0]

    structure[open_index] = range(0, len(open_index))
    structure[closed_index] = range(len(open_index)-1, -1, -1)
    structure[structure == "."] = -1
    structure = structure.astype(int)

    pair_structure = np.array([-1]*len(structure))
    for i in range(len(open_index)):
        start, end = np.where(structure == i)[0]
        pair_structure[start] = end
        pair_structure[end] = start    
        
    return pair_structure

def preprocess_inputs(df, cols=['sequence', 'structure', 'predicted_loop_type']):
    return np.transpose(
        np.array(df[cols].applymap(lambda seq: [token2int[x] for x in seq]).values.tolist()),
        (0, 2, 1))

train_inputs = preprocess_inputs(train[train.signal_to_noise > 1])
train_labels = np.array(train[train.signal_to_noise > 1][target_cols].values.tolist()).transpose((0, 2, 1))

def MCRMSE(y_true, y_pred):
    colwise_mse = tf.reduce_mean(tf.square(y_true - y_pred), axis=1)
    return tf.reduce_mean(tf.sqrt(colwise_mse), axis=1)

In [None]:
def gru_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.GRU(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def lstm_layer(hidden_dim, dropout):
    return tf.keras.layers.Bidirectional(
                                tf.keras.layers.LSTM(hidden_dim,
                                dropout=dropout,
                                return_sequences=True,
                                kernel_initializer = 'orthogonal'))

def build_model(gru=1,seq_len=107, pred_len=68, dropout=0.5,
                embed_dim=75, hidden_dim=128):
    
    inputs = tf.keras.layers.Input(shape=(seq_len, 3))

    embed = tf.keras.layers.Embedding(input_dim=len(token2int), output_dim=embed_dim)(inputs)
    reshaped = tf.reshape(
        embed, shape=(-1, embed.shape[1],  embed.shape[2] * embed.shape[3]))
    
    reshaped = tf.keras.layers.SpatialDropout1D(.2)(reshaped)
    
    if gru==1:
        hidden = gru_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        
    elif gru==0:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==3:
        hidden = gru_layer(hidden_dim, dropout)(reshaped)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==4:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
    elif gru==5:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
        
    elif gru==6:
        hidden = lstm_layer(hidden_dim, dropout)(reshaped)
        hidden = gru_layer(hidden_dim, dropout)(hidden)
        hidden = lstm_layer(hidden_dim, dropout)(hidden)
    
    #only making predictions on the first part of each sequence
    truncated = hidden[:, :pred_len]
    
    out = tf.keras.layers.Dense(5, activation='linear')(truncated)

    model = tf.keras.Model(inputs=inputs, outputs=out)

    #some optimizers
    adam = tf.optimizers.Adam()
    radam = tfa.optimizers.RectifiedAdam()
    lookahead = tfa.optimizers.Lookahead(adam, sync_period=6)
    ranger = tfa.optimizers.Lookahead(radam, sync_period=6)
    
    model.compile(optimizer = adam, loss=MCRMSE)
    
    return model

In [None]:
train_inputs, val_inputs, train_labels, val_labels = train_test_split(train_inputs, train_labels, test_size=.1, random_state=34)

In [None]:
lr_callback = tf.keras.callbacks.ReduceLROnPlateau()

gru = build_model(gru=1)
sv_gru = tf.keras.callbacks.ModelCheckpoint('model_gru.h5')

history_gru = gru.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_gru],
    verbose = 2
)

print(f"Min training loss={min(history_gru.history['loss'])}, min validation loss={min(history_gru.history['val_loss'])}")

Epoch 1/100
30/30 - 3s - loss: 0.4659 - val_loss: 0.4089
Epoch 2/100
30/30 - 1s - loss: 0.4009 - val_loss: 0.3746
Epoch 3/100
30/30 - 1s - loss: 0.3810 - val_loss: 0.3566
Epoch 4/100
30/30 - 1s - loss: 0.3646 - val_loss: 0.3495
Epoch 5/100
30/30 - 1s - loss: 0.3538 - val_loss: 0.3332
Epoch 6/100
30/30 - 1s - loss: 0.3440 - val_loss: 0.3270
Epoch 7/100
30/30 - 1s - loss: 0.3371 - val_loss: 0.3227
Epoch 8/100
30/30 - 1s - loss: 0.3322 - val_loss: 0.3162
Epoch 9/100
30/30 - 1s - loss: 0.3270 - val_loss: 0.3127
Epoch 10/100
30/30 - 1s - loss: 0.3220 - val_loss: 0.3073
Epoch 11/100
30/30 - 1s - loss: 0.3177 - val_loss: 0.3026
Epoch 12/100
30/30 - 1s - loss: 0.3126 - val_loss: 0.2979
Epoch 13/100
30/30 - 1s - loss: 0.3079 - val_loss: 0.2948
Epoch 14/100
30/30 - 1s - loss: 0.3045 - val_loss: 0.2897
Epoch 15/100
30/30 - 1s - loss: 0.3001 - val_loss: 0.2876
Epoch 16/100
30/30 - 1s - loss: 0.2965 - val_loss: 0.2829
Epoch 17/100
30/30 - 1s - loss: 0.2946 - val_loss: 0.2805
Epoch 18/100
30/30 - 1s

### Training with LSTM

In [None]:
lstm = build_model(gru=0)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_lstm.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 3s - loss: 0.4970 - val_loss: 0.4349
Epoch 2/100
30/30 - 1s - loss: 0.4177 - val_loss: 0.3926
Epoch 3/100
30/30 - 1s - loss: 0.3919 - val_loss: 0.3748
Epoch 4/100
30/30 - 1s - loss: 0.3788 - val_loss: 0.3622
Epoch 5/100
30/30 - 1s - loss: 0.3687 - val_loss: 0.3489
Epoch 6/100
30/30 - 1s - loss: 0.3565 - val_loss: 0.3365
Epoch 7/100
30/30 - 1s - loss: 0.3454 - val_loss: 0.3343
Epoch 8/100
30/30 - 1s - loss: 0.3395 - val_loss: 0.3232
Epoch 9/100
30/30 - 1s - loss: 0.3333 - val_loss: 0.3208
Epoch 10/100
30/30 - 1s - loss: 0.3281 - val_loss: 0.3123
Epoch 11/100
30/30 - 1s - loss: 0.3213 - val_loss: 0.3074
Epoch 12/100
30/30 - 1s - loss: 0.3165 - val_loss: 0.3055
Epoch 13/100
30/30 - 1s - loss: 0.3118 - val_loss: 0.3012
Epoch 14/100
30/30 - 1s - loss: 0.3077 - val_loss: 0.2938
Epoch 15/100
30/30 - 1s - loss: 0.3035 - val_loss: 0.2917
Epoch 16/100
30/30 - 1s - loss: 0.2982 - val_loss: 0.2889
Epoch 17/100
30/30 - 1s - loss: 0.2956 - val_loss: 0.2853
Epoch 18/100
30/30 - 2s

In [None]:
lstm = build_model(gru=3)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb1.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")


Epoch 1/100
30/30 - 3s - loss: 0.4813 - val_loss: 0.4185
Epoch 2/100
30/30 - 1s - loss: 0.4026 - val_loss: 0.3892
Epoch 3/100
30/30 - 1s - loss: 0.3849 - val_loss: 0.3665
Epoch 4/100
30/30 - 1s - loss: 0.3722 - val_loss: 0.3536
Epoch 5/100
30/30 - 1s - loss: 0.3566 - val_loss: 0.3372
Epoch 6/100
30/30 - 1s - loss: 0.3447 - val_loss: 0.3286
Epoch 7/100
30/30 - 1s - loss: 0.3355 - val_loss: 0.3198
Epoch 8/100
30/30 - 1s - loss: 0.3279 - val_loss: 0.3137
Epoch 9/100
30/30 - 1s - loss: 0.3203 - val_loss: 0.3081
Epoch 10/100
30/30 - 1s - loss: 0.3141 - val_loss: 0.3009
Epoch 11/100
30/30 - 1s - loss: 0.3086 - val_loss: 0.2997
Epoch 12/100
30/30 - 1s - loss: 0.3037 - val_loss: 0.2910
Epoch 13/100
30/30 - 1s - loss: 0.3000 - val_loss: 0.2855
Epoch 14/100
30/30 - 1s - loss: 0.2943 - val_loss: 0.2816
Epoch 15/100
30/30 - 1s - loss: 0.2894 - val_loss: 0.2770
Epoch 16/100
30/30 - 1s - loss: 0.2862 - val_loss: 0.2726
Epoch 17/100
30/30 - 1s - loss: 0.2811 - val_loss: 0.2702
Epoch 18/100
30/30 - 1s

In [None]:
lstm = build_model(gru=4)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb2.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 4s - loss: 0.4701 - val_loss: 0.4012
Epoch 2/100
30/30 - 1s - loss: 0.3988 - val_loss: 0.3746
Epoch 3/100
30/30 - 1s - loss: 0.3813 - val_loss: 0.3595
Epoch 4/100
30/30 - 1s - loss: 0.3652 - val_loss: 0.3460
Epoch 5/100
30/30 - 1s - loss: 0.3541 - val_loss: 0.3405
Epoch 6/100
30/30 - 1s - loss: 0.3484 - val_loss: 0.3331
Epoch 7/100
30/30 - 1s - loss: 0.3425 - val_loss: 0.3260
Epoch 8/100
30/30 - 1s - loss: 0.3351 - val_loss: 0.3214
Epoch 9/100
30/30 - 1s - loss: 0.3309 - val_loss: 0.3164
Epoch 10/100
30/30 - 1s - loss: 0.3260 - val_loss: 0.3172
Epoch 11/100
30/30 - 1s - loss: 0.3219 - val_loss: 0.3056
Epoch 12/100
30/30 - 1s - loss: 0.3163 - val_loss: 0.3023
Epoch 13/100
30/30 - 1s - loss: 0.3119 - val_loss: 0.2991
Epoch 14/100
30/30 - 1s - loss: 0.3083 - val_loss: 0.2909
Epoch 15/100
30/30 - 1s - loss: 0.3015 - val_loss: 0.2882
Epoch 16/100
30/30 - 1s - loss: 0.2975 - val_loss: 0.2822
Epoch 17/100
30/30 - 1s - loss: 0.2927 - val_loss: 0.2773
Epoch 18/100
30/30 - 1s

In [None]:
lstm = build_model(gru=5)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb3.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 3s - loss: 0.4697 - val_loss: 0.4028
Epoch 2/100
30/30 - 1s - loss: 0.3992 - val_loss: 0.3785
Epoch 3/100
30/30 - 1s - loss: 0.3827 - val_loss: 0.3655
Epoch 4/100
30/30 - 1s - loss: 0.3719 - val_loss: 0.3521
Epoch 5/100
30/30 - 1s - loss: 0.3560 - val_loss: 0.3348
Epoch 6/100
30/30 - 1s - loss: 0.3443 - val_loss: 0.3279
Epoch 7/100
30/30 - 1s - loss: 0.3364 - val_loss: 0.3231
Epoch 8/100
30/30 - 1s - loss: 0.3304 - val_loss: 0.3155
Epoch 9/100
30/30 - 1s - loss: 0.3235 - val_loss: 0.3106
Epoch 10/100
30/30 - 1s - loss: 0.3187 - val_loss: 0.3033
Epoch 11/100
30/30 - 1s - loss: 0.3136 - val_loss: 0.2983
Epoch 12/100
30/30 - 1s - loss: 0.3083 - val_loss: 0.2942
Epoch 13/100
30/30 - 1s - loss: 0.3041 - val_loss: 0.2913
Epoch 14/100
30/30 - 1s - loss: 0.2980 - val_loss: 0.2823
Epoch 15/100
30/30 - 1s - loss: 0.2918 - val_loss: 0.2752
Epoch 16/100
30/30 - 1s - loss: 0.2860 - val_loss: 0.2684
Epoch 17/100
30/30 - 1s - loss: 0.2810 - val_loss: 0.2655
Epoch 18/100
30/30 - 1s

In [None]:
lstm = build_model(gru=6)
sv_lstm = tf.keras.callbacks.ModelCheckpoint('model_hyb4.h5')

history_lstm = lstm.fit(
    train_inputs, train_labels, 
    validation_data=(val_inputs,val_labels),
    batch_size=64,
    epochs=100,
    callbacks=[lr_callback,sv_lstm],
    verbose = 2
)

print(f"Min training loss={min(history_lstm.history['loss'])}, min validation loss={min(history_lstm.history['val_loss'])}")

Epoch 1/100
30/30 - 3s - loss: 0.4793 - val_loss: 0.4183
Epoch 2/100
30/30 - 1s - loss: 0.4050 - val_loss: 0.3811
Epoch 3/100
30/30 - 1s - loss: 0.3861 - val_loss: 0.3676
Epoch 4/100
30/30 - 1s - loss: 0.3721 - val_loss: 0.3507
Epoch 5/100
30/30 - 1s - loss: 0.3569 - val_loss: 0.3376
Epoch 6/100
30/30 - 1s - loss: 0.3457 - val_loss: 0.3287
Epoch 7/100
30/30 - 1s - loss: 0.3370 - val_loss: 0.3222
Epoch 8/100
30/30 - 1s - loss: 0.3301 - val_loss: 0.3164
Epoch 9/100
30/30 - 1s - loss: 0.3253 - val_loss: 0.3133
Epoch 10/100
30/30 - 1s - loss: 0.3222 - val_loss: 0.3052
Epoch 11/100
30/30 - 1s - loss: 0.3146 - val_loss: 0.2975
Epoch 12/100
30/30 - 1s - loss: 0.3093 - val_loss: 0.2942
Epoch 13/100
30/30 - 1s - loss: 0.3049 - val_loss: 0.2886
Epoch 14/100
30/30 - 1s - loss: 0.2993 - val_loss: 0.2823
Epoch 15/100
30/30 - 1s - loss: 0.2945 - val_loss: 0.2765
Epoch 16/100
30/30 - 1s - loss: 0.2882 - val_loss: 0.2725
Epoch 17/100
30/30 - 1s - loss: 0.2828 - val_loss: 0.2663
Epoch 18/100
30/30 - 1s

In [None]:
# Caveat: The prediction format requires the output to be the same length as the input,
# although it's not the case for the training data.
model_public = build_model(gru=6, seq_len=130, pred_len = 130)
model_private = build_model(gru=6, seq_len=130, pred_len = 130)

model_public.load_weights('model_hyb4.h5')
model_private.load_weights('model_hyb4.h5')

In [None]:
public_df = test.query("seq_length == 107")
private_df = test.query("seq_length == 130")

public_inputs = preprocess_inputs(public_df)
private_inputs = preprocess_inputs(private_df)

ValueError: ignored

In [None]:
public_preds = model_public.predict(public_inputs)
private_preds = model_private.predict(private_inputs)

ValueError: ignored

In [None]:
submission.head()

#Saving the final output filej
submission.to_csv('submission14.csv', index=False)

In [None]:
public_df = test.query("seq_length == 107").copy()
private_df = test.query("seq_length == 130").copy()

public_inputs = preprocess_inputs(public_df)
private_inputs = preprocess_inputs(private_df)

# build all models
gru_short = build_model(gru=1, seq_len=107, pred_len=107)
gru_long = build_model(gru=1, seq_len=130, pred_len=130)
lstm_short = build_model(gru=0, seq_len=107, pred_len=107)
lstm_long = build_model(gru=0, seq_len=130, pred_len=130)
hyb1_short = build_model(gru=3, seq_len=107, pred_len=107)
hyb1_long = build_model(gru=3, seq_len=130, pred_len=130)
hyb2_short = build_model(gru=4, seq_len=107, pred_len=107)
hyb2_long = build_model(gru=4, seq_len=130, pred_len=130)
hyb3_short = build_model(gru=5, seq_len=107, pred_len=107)
hyb3_long = build_model(gru=5, seq_len=130, pred_len=130)
hyb4_short = build_model(gru=6, seq_len=107, pred_len=107)
hyb4_long = build_model(gru=6, seq_len=130, pred_len=130)


# load pre-trained model weights
gru_short.load_weights('model_gru.h5')
gru_long.load_weights('model_gru.h5')
lstm_short.load_weights('model_lstm.h5')
lstm_long.load_weights('model_lstm.h5')
hyb1_short.load_weights('model_hyb1.h5')
hyb1_long.load_weights('model_hyb1.h5')
hyb2_short.load_weights('model_hyb2.h5')
hyb2_long.load_weights('model_hyb2.h5')
hyb3_short.load_weights('model_hyb3.h5')
hyb3_long.load_weights('model_hyb3.h5')
hyb4_short.load_weights('model_hyb4.h5')
hyb4_long.load_weights('model_hyb4.h5')

# and predict
gru_public_preds = gru_short.predict(public_inputs)
gru_private_preds = gru_long.predict(private_inputs)
lstm_public_preds = lstm_short.predict(public_inputs)
lstm_private_preds = lstm_long.predict(private_inputs)
hyb1_public_preds = hyb1_short.predict(public_inputs)
hyb1_private_preds = hyb1_long.predict(private_inputs)
hyb2_public_preds = hyb2_short.predict(public_inputs)
hyb2_private_preds = hyb2_long.predict(private_inputs)
hyb3_public_preds = hyb3_short.predict(public_inputs)
hyb3_private_preds = hyb3_long.predict(private_inputs)
hyb4_public_preds = hyb4_short.predict(public_inputs)
hyb4_private_preds = hyb4_long.predict(private_inputs)

preds_gru = []

for df, preds in [(public_df, gru_public_preds), (private_df, gru_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_gru.append(single_df)

preds_gru_df = pd.concat(preds_gru)

preds_lstm = []

for df, preds in [(public_df, lstm_public_preds), (private_df, lstm_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_lstm.append(single_df)

preds_lstm_df = pd.concat(preds_lstm)


preds_hyb1 = []

for df, preds in [(public_df, hyb1_public_preds), (private_df, hyb1_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb1.append(single_df)

preds_hyb1_df = pd.concat(preds_hyb1)


preds_hyb2 = []

for df, preds in [(public_df, hyb2_public_preds), (private_df, hyb2_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb2.append(single_df)

preds_hyb2_df = pd.concat(preds_hyb2)

preds_hyb3 = []

for df, preds in [(public_df, hyb3_public_preds), (private_df, hyb3_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb3.append(single_df)

preds_hyb3_df = pd.concat(preds_hyb3)

preds_hyb4 = []

for df, preds in [(public_df, hyb4_public_preds), (private_df, hyb4_private_preds)]:
    for i, uid in enumerate(df.id):
        single_pred = preds[i]

        single_df = pd.DataFrame(single_pred, columns=target_cols)
        single_df['id_seqpos'] = [f'{uid}_{x}' for x in range(single_df.shape[0])]

        preds_hyb4.append(single_df)

preds_hyb4_df = pd.concat(preds_hyb4)

blend_preds_df = pd.DataFrame()
blend_preds_df['id_seqpos'] = preds_gru_df['id_seqpos']
blend_preds_df['reactivity'] = 0.2*preds_gru_df['reactivity'] + 0.2*preds_lstm_df['reactivity'] + 0.2*preds_hyb1_df['reactivity'] + 0.2*preds_hyb2_df['reactivity'] + 0.1*preds_hyb3_df['reactivity']+0.1*preds_hyb4_df['reactivity']
blend_preds_df['deg_Mg_pH10'] = 0.2*preds_gru_df['deg_Mg_pH10'] + 0.2*preds_lstm_df['deg_Mg_pH10'] + 0.2*preds_hyb1_df['deg_Mg_pH10'] + 0.2*preds_hyb2_df['deg_Mg_pH10'] + 0.1*preds_hyb3_df['deg_Mg_pH10']+ 0.1*preds_hyb4_df['deg_Mg_pH10']
blend_preds_df['deg_pH10'] = 0.2*preds_gru_df['deg_pH10'] + 0.2*preds_lstm_df['deg_pH10'] + 0.2*preds_hyb1_df['deg_pH10'] + 0.2*preds_hyb2_df['deg_pH10'] + 0.1*preds_hyb3_df['deg_pH10']+0.1*preds_hyb4_df['deg_pH10']
blend_preds_df['deg_Mg_50C'] = 0.2*preds_gru_df['deg_Mg_50C'] + 0.2*preds_lstm_df['deg_Mg_50C'] + 0.2*preds_hyb1_df['deg_Mg_50C'] + 0.2*preds_hyb2_df['deg_Mg_50C'] + 0.1*preds_hyb3_df['deg_Mg_50C']+0.1*preds_hyb4_df['deg_Mg_50C']
blend_preds_df['deg_50C'] = 0.2*preds_gru_df['deg_50C'] + 0.2*preds_lstm_df['deg_50C'] + 0.2*preds_hyb1_df['deg_50C'] + 0.2*preds_hyb2_df['deg_Mg_50C'] + 0.1*preds_hyb3_df['deg_Mg_50C']+0.1*preds_hyb4_df['deg_Mg_50C']

submission = sample_sub[['id_seqpos']].merge(blend_preds_df, on=['id_seqpos'])

ValueError: ignored