In [None]:
from transformers import RobertaTokenizer
from transformers import TFRobertaModel
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Concatenate, Bidirectional, LSTM , Dropout 
from tensorflow.keras.models import Model

In [None]:
preprocessed_data =  pd.read_csv('<<preprocessed data file path>>')

In [None]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
encoded_data = tokenizer.batch_encode_plus(
    preprocessed_data['mouse_data'],
    padding=True,
    truncation=True,
    max_length=512,
    return_attention_mask=False,
    return_token_type_ids=False,
    return_tensors='tf'
)
encoded_labels = preprocessed_data['label'].values

In [None]:
train_inputs, test_inputs, train_labels, test_labels = train_test_split(
    encoded_data['input_ids'].numpy(), encoded_labels, test_size=0.2, random_state=42
)

In [None]:
roberta = TFRobertaModel.from_pretrained('roberta-base')

input_layer = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name='input_layer')
roberta_layer = roberta(input_layer)[0]
gru_layer = tf.keras.layers.GRU(units=128)(roberta_layer)
output_layer = tf.keras.layers.Dense(units=1, activation='sigmoid')(gru_layer)

model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(x=train_inputs, y=train_labels, validation_data=(test_inputs, test_labels),  batch_size=8)

In [None]:
roberta = TFRobertaModel.from_pretrained('roberta-base')

input_layer = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name='input_layer')
roberta_layer = roberta(input_layer)[0]
lstm_output = LSTM(128)(roberta_layer)
output_layer = Dense(1, activation='sigmoid')(lstm_output)

model_inputs = input_layer
model_outputs = output_layer

model = Model(inputs=model_inputs, outputs=model_outputs)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:

model.summary()

In [None]:
model.fit(x=train_inputs, y=train_labels, validation_data=(test_inputs, test_labels),  batch_size=8)

In [None]:
roberta = TFRobertaModel.from_pretrained('roberta-base')

input_layer = Input(shape=(512,), dtype=tf.int32, name='input_layer')

roberta.trainable = False

roberta_layer = roberta(input_layer)[0]

lstm_layer = Bidirectional(LSTM(64, return_sequences=False))(roberta_layer)

combined_layer = Concatenate()([roberta_layer[:, 0, :], lstm_layer])

dense_layer = Dense(128, activation='relu')(combined_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(1, activation='sigmoid')(dropout_layer)

model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(x=train_inputs, y=train_labels, validation_data=(test_inputs, test_labels),  batch_size=8)