In [34]:
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR

from utils_io import read_bitarrays
from utils_attacker_lstm import DatasetAttackerLSTMBeacon, DataLoaderAttackerLSTM, ModelAttackerLSTMLinear, LSTMAttackerTrainer, LSTMAttackerTester
from utils_torch import stratified_random_split

In [35]:
num_snps = 40000
genomes_beacon = read_bitarrays('../data/test/In_Pop.pkl')[:, :num_snps]
genomes_reference = read_bitarrays('../data/test/Not_In_Pop.pkl')[:, :num_snps]
genomes = np.concatenate((genomes_beacon, genomes_reference), axis=0)

In [36]:
labels_beacon = np.ones(genomes_beacon.shape[0], dtype=bool)
labels_reference = np.zeros(genomes_reference.shape[0], dtype=bool)
labels = np.concatenate((labels_beacon, labels_reference), axis=0).astype(bool)

In [37]:
presences_beacon = np.any(genomes_beacon, axis=0).astype(bool)
frequencies_reference = np.mean(genomes, axis=0)

In [38]:
dataset = DatasetAttackerLSTMBeacon(
    target_genomes=genomes,
    beacon_presences=presences_beacon,
    reference_frequencies=frequencies_reference,
    labels=labels)
subset_train, subset_eval, subset_test = stratified_random_split(dataset, [0.7, 0.15, 0.15])

In [39]:
genomes_batch_size, snps_batch_size = 32, 10000
loader_train = DataLoaderAttackerLSTM(subset_train, genomes_batch_size, snps_batch_size, shuffle=True)
loader_eval = DataLoaderAttackerLSTM(subset_eval, genomes_batch_size, snps_batch_size, shuffle=False)
loader_test = DataLoaderAttackerLSTM(subset_test, genomes_batch_size, snps_batch_size, shuffle=False)

In [40]:
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [41]:
model = ModelAttackerLSTMLinear(lstm_input_size=3, lstm_hidden_size=24, lstm_num_layers=1, lstm_bidirectional=False, lstm_dropout=0.5)
model.to(device)

ModelAttackerLSTMLinear(
  (lstm): LSTM(3, 24, batch_first=True)
  (linear): Linear(in_features=24, out_features=1, bias=True)
)

In [42]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# scheduler = StepLR(optimizer, step_size=1, gamma=0.9) 

In [43]:
trainer = LSTMAttackerTrainer(model, criterion, optimizer, loader_train, loader_eval, device)

In [44]:
losses_train, accuracies_train, losses_eval, accuracies_eval = trainer.train(num_epochs=256, verbose=True)
min_loss = min(losses_eval)

Epoch 1/256
Train Loss: 0.6981, Train Accuracy: 0.50
Evaluation Loss: 0.6943, Evaluation Accuracy: 0.50
Evaluation Loss Decreased: inf -> 0.6943. Saving Model...
Epoch 2/256
Train Loss: 0.6947, Train Accuracy: 0.50
Evaluation Loss: 0.6930, Evaluation Accuracy: 0.50
Evaluation Loss Decreased: 0.6943 -> 0.6930. Saving Model...
Epoch 3/256
Train Loss: 0.6935, Train Accuracy: 0.50
Evaluation Loss: 0.6929, Evaluation Accuracy: 0.50
Evaluation Loss Decreased: 0.6930 -> 0.6929. Saving Model...
Epoch 4/256
Train Loss: 0.6933, Train Accuracy: 0.50
Evaluation Loss: 0.6931, Evaluation Accuracy: 0.50
Epoch 5/256
Train Loss: 0.6933, Train Accuracy: 0.49
Evaluation Loss: 0.6931, Evaluation Accuracy: 0.51
Epoch 6/256
Train Loss: 0.6933, Train Accuracy: 0.49
Evaluation Loss: 0.6931, Evaluation Accuracy: 0.52
Epoch 7/256
Train Loss: 0.6933, Train Accuracy: 0.48
Evaluation Loss: 0.6930, Evaluation Accuracy: 0.52
Epoch 8/256
Train Loss: 0.6933, Train Accuracy: 0.48
Evaluation Loss: 0.6930, Evaluation Acc

In [45]:
model.save("../models", f"Attacker_LSTM_Beacon_SNP{num_snps}_LSS{int(min_loss * 10000)}_DTT{datetime.now().strftime('%m%d%H%M')}")
# model.load("../models", "beacon_lstm_attacker_20210919123456")

In [46]:
tester = LSTMAttackerTester(model, criterion, loader_test, device)

In [47]:
loss, accuracy, precision, recall, f1, auroc, cm = tester.test()
print(f"Loss: {loss:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1: {f1:.4f}")
print(f"AUROC: {auroc:.4f}")

Loss: 0.6968
Accuracy: 0.5167
Precision: 0.5147
Recall: 0.5833
F1: 0.5469
AUROC: 0.5167


In [48]:
chance = np.mean(subset_test.targets)


TypeError: mean() received an invalid combination of arguments - got (axis=NoneType, dtype=NoneType, out=NoneType, ), but expected one of:
 * (*, torch.dtype dtype)
 * (tuple of ints dim, bool keepdim, *, torch.dtype dtype)
 * (tuple of names dim, bool keepdim, *, torch.dtype dtype)
