### Description

This implements a CTGAN with the same structure as 1_0 (with minor changes), which takes a DNA sequence with sections masked and a desired expression, and then generates a full sequence.

This CTGAN's loss is evaluated in part by the CNN_5_0 model.

In [1]:
import CTGAN_1_1 as parent

In [2]:
version = '1_1'
model_dir = '../Models/'

In [3]:
# Hyperparameters
sequence_length = 150
latent_dim = 100
expression_dim = 1
batch_size = 64
num_epochs = 100
lr = 0.0002

In [4]:
# Load and preprocess data
file_path = '../Data/combined/LaFleur_supp.csv'
dataloader = parent.load_and_preprocess_data(file_path, batch_size)

# Initialize models
device = parent.initialize_device()
generator, discriminator, cnn = parent.initialize_models(sequence_length, latent_dim, expression_dim, device, '../Models/CNN_5_0.keras')

In [None]:
# Train the CTGAN
parent.train_ctgan(generator, discriminator, cnn, dataloader, num_epochs=num_epochs, latent_dim=latent_dim, expression_dim=expression_dim, lr=lr, device=device)

Epoch [1/100] | D Loss: 0.0134 | G Loss: 11.2821 | CNN Loss: 0.0178
Epoch [2/100] | D Loss: 0.0203 | G Loss: 28.4162 | CNN Loss: 0.0234
Epoch [3/100] | D Loss: 0.0065 | G Loss: 20.0688 | CNN Loss: 0.0209
Epoch [4/100] | D Loss: 0.0238 | G Loss: 14.8785 | CNN Loss: 0.0150
Epoch [5/100] | D Loss: 0.0082 | G Loss: 11.4259 | CNN Loss: 0.0166
Epoch [6/100] | D Loss: 0.0016 | G Loss: 11.7582 | CNN Loss: 0.0133
Epoch [7/100] | D Loss: 0.0614 | G Loss: 21.9116 | CNN Loss: 0.0294
Epoch [8/100] | D Loss: 0.0174 | G Loss: 12.6500 | CNN Loss: 0.0175
Epoch [9/100] | D Loss: 0.0468 | G Loss: 13.2993 | CNN Loss: 0.0216
Epoch [10/100] | D Loss: 0.0272 | G Loss: 8.0825 | CNN Loss: 0.0198
Epoch [11/100] | D Loss: 0.0158 | G Loss: 8.8686 | CNN Loss: 0.0177
Epoch [12/100] | D Loss: 0.0087 | G Loss: 13.6790 | CNN Loss: 0.0135
Epoch [13/100] | D Loss: 0.0417 | G Loss: 11.5808 | CNN Loss: 0.0189
Epoch [14/100] | D Loss: 0.0090 | G Loss: 10.8836 | CNN Loss: 0.0274
Epoch [15/100] | D Loss: 0.0003 | G Loss: 11.

In [None]:
# Save the trained models
parent.save_model(generator, f'{model_dir}generator_{version}.pth')
parent.save_model(discriminator, f'{model_dir}discriminator_{version}.pth')

In [None]:
# Load the models
parent.load_model(generator, f'{model_dir}generator_{version}.pth')
parent.load_model(discriminator, f'{model_dir}discriminator_{version}.pth')

In [None]:
# Values to evaluate the Generator
sequences = ['TTTTCTATCTACGTACTTGACACTATTTC______________ATT__________ACCTTAGTTTGTACGTT']
expressions = [0.5]

# Evaluate the Generator
generated_sequences = parent.evaluate_generator(generator, expressions, latent_dim=latent_dim, device=device)
print("Original Sequences: ", sequences[0])
print(" Decoded Sequences: ", parent.decode_one_hot_sequences(generated_sequences)[0])