### Description

This trains an LSTM RNN that takes a 150x5 array (4 nucleotides, 1 placeholder for missing section, and 1 placeholder for padding) and the gene expression (seperate) as inputs.

Changes:
1. The concatenation of sequence and expression data is handled efficiently, and masking is applied to the combined input.

In [1]:
import RNN_1_2 as parent

In [2]:
name = 'RNN_1_2'

In [3]:
file_path = '../Data/combined/LaFleur_supp.csv'

df = parent.load_and_preprocess_data(file_path)

In [4]:
X_sequence, X_expressions, y = parent.preprocess_X_y(df)

In [5]:
X_sequence_train, X_sequence_test, X_expressions_train, X_expressions_test, y_train, y_test = parent.train_test_split(
        X_sequence, X_expressions, y, test_size=0.2, random_state=42)

In [6]:
model = parent.build_model(sequence_length=150, nucleotide_dim=4, expression_dim=1)

In [7]:
parent.train_model(model, X_sequence_train, X_expressions_train, y_train, batch_size=32, epochs=10)

Epoch 1/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 65ms/step - accuracy: 0.9351 - loss: 0.0509 - val_accuracy: 0.9734 - val_loss: 0.0512
Epoch 2/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 99ms/step - accuracy: 0.9735 - loss: 0.0510 - val_accuracy: 0.9734 - val_loss: 0.0512
Epoch 3/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 71ms/step - accuracy: 0.9736 - loss: 0.0509 - val_accuracy: 0.9734 - val_loss: 0.0512
Epoch 4/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 57ms/step - accuracy: 0.9736 - loss: 0.0508 - val_accuracy: 0.9734 - val_loss: 0.0512
Epoch 5/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 53ms/step - accuracy: 0.9736 - loss: 0.0508 - val_accuracy: 0.9734 - val_loss: 0.0512
Epoch 6/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 53ms/step - accuracy: 0.9736 - loss: 0.0509 - val_accuracy: 0.9734 - val_loss: 0.0512
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x17844629bb0>

In [8]:
loss, accuracy = parent.evaluate_model(model, X_sequence_test, X_expressions_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')

[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.9732 - loss: 0.0514
Loss: 0.051319096237421036, Accuracy: 0.9732710719108582


In [9]:
model.save(f'../Models/{name}.keras')

#### From previous modeling:
Test Loss: 0.05334583297371864

Test Accuracy: 0.9736703634262085