### Description

This trains an LSTM RNN that takes a 150x5 array (4 nucleotides, 1 placeholder for missing section, and 1 placeholder for padding) and the gene expression (seperate) as inputs.

Changes:
1. The concatenation of sequence and expression data is handled efficiently, and masking is applied to the combined input.

In [1]:
import RNN_1_2 as parent

In [2]:
name = 'RNN_1_2'

In [3]:
file_path = '../Data/combined/LaFleur_supp.csv'

df = parent.load_and_preprocess_data(file_path)

In [4]:
X_sequence, X_expressions, y = parent.preprocess_X_y(df)

In [5]:
X_sequence_train, X_sequence_test, X_expressions_train, X_expressions_test, y_train, y_test = parent.train_test_split(
        X_sequence, X_expressions, y, test_size=0.2, random_state=42)

In [6]:
model = parent.build_model(sequence_length=150, nucleotide_dim=4, expression_dim=1)

In [7]:
parent.train_model(model, X_sequence_train, X_expressions_train, y_train, batch_size=32, epochs=10)

Epoch 1/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 84ms/step - accuracy: 0.6698 - loss: 0.6711 - val_accuracy: 0.7961 - val_loss: 0.1789
Epoch 2/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 80ms/step - accuracy: 0.7965 - loss: 0.1670 - val_accuracy: 0.7921 - val_loss: 0.1415
Epoch 3/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 81ms/step - accuracy: 0.7933 - loss: 0.1383 - val_accuracy: 0.7924 - val_loss: 0.1280
Epoch 4/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 78ms/step - accuracy: 0.7957 - loss: 0.1254 - val_accuracy: 0.7951 - val_loss: 0.1187
Epoch 5/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 78ms/step - accuracy: 0.7969 - loss: 0.1172 - val_accuracy: 0.7949 - val_loss: 0.1122
Epoch 6/10
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 55ms/step - accuracy: 0.7970 - loss: 0.1109 - val_accuracy: 0.7948 - val_loss: 0.1063
Epoch 7/10
[1m9

<keras.src.callbacks.history.History at 0x206c98d3e60>

In [8]:
loss, accuracy = parent.evaluate_model(model, X_sequence_test, X_expressions_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')

[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.7955 - loss: 0.0968
Loss: 0.09667032212018967, Accuracy: 0.794690728187561


In [9]:
model.save(f'../Models/{name}.keras')

#### From previous modeling:
Test Loss: 0.09667032212018967

Test Accuracy: 0.794690728187561