### Description

This implements the first GAN (not CTGAN), which takes a DNA sequence with sections masked and a desired expression, and then generates a full sequence.

In [1]:
import CTGAN_1_1 as parent

In [2]:
version = '1_1'
model_dir = '../Models/'

In [3]:
# Hyperparameters
sequence_length = 150
latent_dim = 100
expression_dim = 1
batch_size = 64
num_epochs = 100
lr = 0.0002

In [4]:
# Load and preprocess data
file_path = '../Data/combined/LaFleur_supp.csv'
dataloader = parent.load_and_preprocess_data(file_path, batch_size)

# Initialize models
device = parent.initialize_device()
generator, discriminator = parent.initialize_models(sequence_length, latent_dim, expression_dim, device)

In [None]:
# Train the CTGAN
parent.train_ctgan(generator, discriminator, dataloader, num_epochs=num_epochs, latent_dim=latent_dim, expression_dim=expression_dim, lr=lr, device=device)

In [None]:
# Save the trained models
parent.save_model(generator, f'{model_dir}generator_{version}.pth')
parent.save_model(discriminator, f'{model_dir}discriminator{version}.pth')

In [None]:
# Load the models
parent.load_model(generator, f'{model_dir}generator_{version}.pth')
parent.load_model(discriminator, f'{model_dir}discriminator_{version}.pth')

In [None]:
# Evaluate the Generator
sequence = 'TTTTCTATCTACGTACTTGACACTATTTC______________ATT__________ACCTTAGTTTGTACGTT'
generated_sequences = parent.evaluate_generator(generator, ['TTTTCTATCTACGTACTTGACACTATTTC______________ATT__________ACCTTAGTTTGTACGTT'], latent_dim=latent_dim, device=device)
print("Generated Sequences: ", generated_sequences)
print("Decoded Sequences: ", parent.decode_one_hot_sequences(generated_sequences))