In [None]:
import pandas as pd
from transformers import AutoTokenizer, TFAutoModel
import torch
import tensorflow as tf
from sklearn.model_selection import train_test_split



In [None]:
preprocessed_data =  pd.read_csv("<<preprocessed data file path>>")

In [None]:
tokenizer = AutoTokenizer.from_pretrained('albert-base-v2')
encoded_data = tokenizer.batch_encode_plus(
    preprocessed_data['mouse_data'].tolist(),
    padding=True,
    truncation=True,
    max_length=512,
    return_attention_mask=True,
    return_token_type_ids=False,
    return_tensors='tf'
)
encoded_labels = preprocessed_data['label'].values

In [None]:
train_inputs, test_inputs, train_labels, test_labels = train_test_split(
    encoded_data['input_ids'].numpy(), encoded_labels, test_size=0.2, random_state=42
)

train_masks, test_masks, _, _ = train_test_split(
    encoded_data['attention_mask'].numpy(), encoded_labels, test_size=0.2, random_state=42
)

In [None]:
albert = TFAutoModel.from_pretrained('albert-base-v2')

input_layer = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name='input_layer')
mask_layer = tf.keras.layers.Input(shape=(512,), dtype=tf.int32, name='mask_layer')
albert_output = albert({'input_ids': input_layer, 'attention_mask': mask_layer})[0]
gru_output = tf.keras.layers.GRU(units=32)(albert_output)
output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(gru_output)
model = tf.keras.Model(inputs=[input_layer, mask_layer], outputs=output_layer)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(x=[train_inputs, train_masks], y=train_labels, validation_data=([test_inputs, test_masks], test_labels),  batch_size=8)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense

In [None]:
albert = TFAutoModel.from_pretrained('albert-base-v2')

input_layer = Input(shape=(512,), dtype=tf.int32, name='input_layer')
mask_layer = Input(shape=(512,), dtype=tf.int32, name='mask_layer')

albert_output = albert(input_layer, attention_mask=mask_layer)[0]

lstm_output = LSTM(128)(albert_output)

output_layer = Dense(1, activation='sigmoid')(lstm_output)

model_inputs = [input_layer, mask_layer]
model_outputs = output_layer

model = Model(inputs=model_inputs, outputs=model_outputs)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(
    {'input_layer': train_inputs, 'mask_layer': train_masks},
    train_labels,
    batch_size=8,
    validation_data=({'input_layer': test_inputs, 'mask_layer': test_masks}, test_labels)
)

In [None]:
from tensorflow.keras.layers import Input, Dense, Concatenate,  Bidirectional, LSTM


In [None]:
from tensorflow.keras.layers import Dropout 

In [None]:
albert = TFAutoModel.from_pretrained('albert-base-v2')

input_layer = Input(shape=(512,), dtype=tf.int32, name='input_layer')
mask_layer = Input(shape=(512,), dtype=tf.int32, name='mask_layer')

albert.trainable = False

albert_output = albert(input_layer, attention_mask=mask_layer)[0]

lstm_layer = Bidirectional(LSTM(64, return_sequences=False))(albert_output)

combined_layer = Concatenate()([albert_output[:, 0, :], lstm_layer])

dense_layer = Dense(128, activation='relu')(combined_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(1, activation='sigmoid')(dropout_layer)

model = tf.keras.models.Model(inputs=[input_layer, mask_layer], outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(
    {'input_layer': train_inputs, 'mask_layer': train_masks},
    train_labels,
    batch_size=8,
    validation_data=({'input_layer': test_inputs, 'mask_layer': test_masks}, test_labels)
)