## Import Libraries

### Python Libraries

In [None]:
from datetime import datetime

### External Libraries

In [None]:
import numpy as np
import torch
import torch.nn as nn

### Custom Libraries

In [None]:
from utils_random import set_random_seed
from utils_io import read_bitarrays
from utils_torch.data import stratified_random_split
from utils_attacker_lstm.data import DatasetAttackerLSTMPool, DataLoaderAttackerLSTM
from utils_attacker_lstm.models import ModelAttackerConvLSTMLinear, TrainerAttackerLSTM, TesterAttackerLSTM, ManagerAttackerLSTM

## Set Parameters

### Model Id

In [None]:
model_id = datetime.now().strftime('%m%d%H%M')

In [None]:
random_seed = 42

### Data Params

In [None]:
num_snps = 40000
train_eval_test_split = [0.7, 0.15, 0.15]

### Loader Params

In [None]:
genome_batch_size = 32
snp_batch_size = 20000

### Model Params

#### Conv1d Params

In [None]:
conv_num_layers = 1
conv_channel_size = [3, 8]
conv_kernel_size = [20]
conv_stride = [2]
conv_dilation = [1]
conv_groups = [1]

conv_activation = []
conv_activation_kwargs = []

conv_dropout_p = []
conv_dropout_first = []

conv_batch_norm = []
conv_batch_norm_momentum = []

#### Conv1d to LSTM Params

In [None]:
conv_lstm_activation = nn.ReLU
conv_lstm_activation_kwargs = {}
conv_lstm_dropout_p = 0.5
conv_lstm_dropout_first = True
conv_lstm_layer_norm = True

#### LSTM Params

In [None]:
lstm_num_layers = 1
lstm_input_size = 8
lstm_hidden_size = [16]
lstm_proj_size = [0]
lstm_bidirectional = [False]

lstm_dropout_p = []
lstm_dropout_first = []

lstm_layer_norm = []

#### LSTM to Linear Params

In [None]:
lstm_linear_dropout_p = 0.25
lstm_linear_dropout_first = True

lstm_linear_batch_norm = True
lstm_linear_batch_norm_momentum = 0.1

#### Linear Params

In [None]:
linear_num_layers = 1
linear_num_features = [16, 1]

linear_activation = []
linear_activation_kwargs = []

linear_dropout_p = []
linear_dropout_first = []

linear_batch_norm = []
linear_batch_norm_momentum = []

### Trainer Params

In [None]:
num_epochs = 256
learning_rate = 0.001

## Set Torch Device

In [None]:
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

## Set Random Seed

In [None]:
set_random_seed(random_seed)

## Load Data

### Load Genomes

In [None]:
genomes_pool = read_bitarrays('../data/test/In_Pop.pkl')[:, :num_snps]
genomes_reference = read_bitarrays('../data/test/Not_In_Pop.pkl')[:, :num_snps]
genomes = np.concatenate((genomes_pool, genomes_reference), axis=0)

### Load Labels

In [None]:
labels_beacon = np.ones(genomes_pool.shape[0], dtype=bool)
labels_reference = np.zeros(genomes_reference.shape[0], dtype=bool)
labels = np.concatenate((labels_beacon, labels_reference), axis=0).astype(bool)

### Compute Frequencies

In [None]:
frequencies_pool = np.mean(genomes_pool, axis=0)
frequencies_reference = np.mean(genomes_reference, axis=0)

## Create Dataset

In [None]:
dataset = DatasetAttackerLSTMPool(
    target_genomes=genomes,
    pool_frequencies=frequencies_pool,
    reference_frequencies=frequencies_reference,
    targets=labels)
subset_train, subset_eval, subset_test = stratified_random_split(dataset, train_eval_test_split)

## Create DataLoaders

In [None]:
loader_train = DataLoaderAttackerLSTM(subset_train, genome_batch_size, snp_batch_size, shuffle=True)
loader_eval = DataLoaderAttackerLSTM(subset_eval, genome_batch_size, snp_batch_size, shuffle=False)
loader_test = DataLoaderAttackerLSTM(subset_test, genome_batch_size, snp_batch_size, shuffle=False)

## Create Model

In [None]:
model = ModelAttackerConvLSTMLinear(
    conv_num_layers=conv_num_layers,
    conv_channel_size=conv_channel_size,
    conv_kernel_size=conv_kernel_size,
    conv_stride=conv_stride,
    conv_dilation=conv_dilation,
    conv_groups=conv_groups,
    conv_activation=conv_activation,
    conv_activation_kwargs=conv_activation_kwargs,
    conv_dropout_p=conv_dropout_p,
    conv_dropout_first=conv_dropout_first,
    conv_batch_norm=conv_batch_norm,
    conv_batch_norm_momentum=conv_batch_norm_momentum,
    conv_lstm_activation=conv_lstm_activation,
    conv_lstm_activation_kwargs=conv_lstm_activation_kwargs,
    conv_lstm_dropout_p=conv_lstm_dropout_p,
    conv_lstm_dropout_first=conv_lstm_dropout_first,
    conv_lstm_layer_norm=conv_lstm_layer_norm,
    lstm_num_layers=lstm_num_layers,
    lstm_input_size=lstm_input_size,
    lstm_hidden_size=lstm_hidden_size,
    lstm_proj_size=lstm_proj_size,
    lstm_bidirectional=lstm_bidirectional,
    lstm_dropout_p=lstm_dropout_p,
    lstm_dropout_first=lstm_dropout_first,
    lstm_layer_norm=lstm_layer_norm,
    lstm_linear_dropout_p=lstm_linear_dropout_p,
    lstm_linear_dropout_first=lstm_linear_dropout_first,
    lstm_linear_batch_norm=lstm_linear_batch_norm,
    lstm_linear_batch_norm_momentum=lstm_linear_batch_norm_momentum,
    linear_num_layers=linear_num_layers,
    linear_num_features=linear_num_features,
    linear_activation=linear_activation,
    linear_activation_kwargs=linear_activation_kwargs,
    linear_dropout_p=linear_dropout_p,
    linear_dropout_first=linear_dropout_first,
    linear_batch_norm=linear_batch_norm,
    linear_batch_norm_momentum=linear_batch_norm_momentum
)
model.to(device)

## Create Trainer

### Create Criterion and Optimizer

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Create Trainer

In [None]:
trainer = TrainerAttackerLSTM(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_loader=loader_train,
    eval_loader=loader_eval,
    device=device,
    max_grad_norm=1.0,
    norm_type=2
)

## Create Tester

In [None]:
tester = TesterAttackerLSTM(
    model=model,
    criterion=criterion,
    test_loader=loader_test,
    device=device
)

## Create Manager

In [None]:
manager = ManagerAttackerLSTM(
    models_dir='../models/',
    models_file=None
)

## Train Model

### Train Model

In [None]:
trainer.train(num_epochs=num_epochs, verbose=True)

### Print Metrics

In [None]:
finish_time = datetime.now()
best_eval_loss_epoch = trainer.best_eval_loss_epoch
best_eval_loss = trainer.best_eval_loss
best_eval_accuracy = trainer.eval_accuracies[best_eval_loss_epoch]

print(f'Finished training at {finish_time}')
print(f'Best evaluation loss epoch found at: {best_eval_loss_epoch}')
print(f'Best evaluation loss found: {best_eval_loss:.4f}')
print(f'Best evaluation accuracy found: {best_eval_accuracy:.4f}')

## Test Model

### Test Model

In [None]:
tester.test()

### Print Metrics

In [None]:
print(f'Test loss: {tester.test_loss:.4f}')
print(f'Test accuracy: {tester.test_accuracy:.2f}')
print(f'Test precision: {tester.test_precision:.2f}')
print(f'Test recall: {tester.test_recall:.2f}')
print(f'Test f1: {tester.test_f1:.2f}')
print(f'Test AUC: {tester.test_auc:.2f}')

## Save Model


In [None]:
manager.add_model(
    model_id=model_id,
    data=dataset,
    loader=loader_train,
    model=model,
    trainer=trainer,
    tester=tester
)