<H1 align="center">Likelihood Ratio Test for Membership Inference Attack on AAF Data</H1>

<br>
<strong>This notebook performs a likelihood ratio test-based membership inference attack on the AAF data for a single sequence length</strong>

## 1. Importing Required Libraries

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve

from src.utils_attacker_lrt import likelihood_ratio_statistic_pool_haploid_approximate, likelihood_ratio_test_threshold, \
    likelihood_ratio_test
from src.utils_io import read_bitarrays
from src.utils_plot import plot_likelihood_ratio_statistics, plot_receiver_operating_characteristics_curve

## 2. Setting Parameters

In [None]:
false_positive_rate = 0.05
num_snps = 1338843

## 3. Reading Data

In [None]:
genomes_pool = read_bitarrays("../data/test/In_Pop.pkl")[:, :num_snps]
genomes_reference = read_bitarrays("../data/test/Not_In_Pop.pkl")[:, :num_snps]
genomes = np.concatenate((genomes_pool, genomes_reference), axis=0)

## 4. Labeling Data

In [None]:
labels_pool = np.ones(genomes_pool.shape[0], dtype=bool)
labels_reference = np.zeros(genomes_reference.shape[0], dtype=bool)
labels = np.concatenate((labels_pool, labels_reference), axis=0).astype(bool)

## 5. Computing AAF Pool Responses and Auxiliary Variables

In [None]:
minor_allele_frequencies_pool = np.mean(genomes_pool, axis=0)
minor_allele_frequencies_reference = np.mean(genomes_reference, axis=0)

## 6. Computing Likelihood Ratio Statistics

In [None]:
likelihood_ratio_statistics_pool = likelihood_ratio_statistic_pool_haploid_approximate(
    target_genomes=genomes_pool,
    pool_frequencies=minor_allele_frequencies_pool,
    reference_frequencies=minor_allele_frequencies_reference)
likelihood_ratio_statistics_reference = likelihood_ratio_statistic_pool_haploid_approximate(
    target_genomes=genomes_reference,
    pool_frequencies=minor_allele_frequencies_pool,
    reference_frequencies=minor_allele_frequencies_reference)
likelihood_ratio_statistics = np.concatenate((likelihood_ratio_statistics_pool, likelihood_ratio_statistics_reference),
                                             axis=0)

## 7. Computing Likelihood Ratio Test Threshold

In [None]:
threshold = likelihood_ratio_test_threshold(
    likelihood_ratio_statistics=likelihood_ratio_statistics,
    labels=labels,
    false_positive_rate=false_positive_rate)

## 8. Plotting Likelihood Ratio Statistics

In [None]:
plot_likelihood_ratio_statistics(
    likelihood_ratio_statistics=likelihood_ratio_statistics,
    membership_labels=labels,
    threshold=threshold)

## 9. Performing Likelihood Ratio Test

In [None]:
inverted = np.mean(likelihood_ratio_statistics_pool) < np.mean(likelihood_ratio_statistics_reference)
predicted = likelihood_ratio_test(
    likelihood_ratio_statistics=likelihood_ratio_statistics,
    threshold=threshold,
    inverted=inverted)

## 10. Computing Metrics

In [None]:
accuracy = accuracy_score(labels, predicted)
precision = precision_score(labels, predicted)
recall = recall_score(labels, predicted)
f1 = f1_score(labels, predicted)
roc_auc = roc_auc_score(labels, likelihood_ratio_statistics)
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1: {f1:.2f}")
print(f"ROC AUC: {roc_auc:.2f}")

## 11. Plotting Receiver Operating Characteristics Curve

In [None]:
fpr, tpr, _ = roc_curve(labels, likelihood_ratio_statistics)
plot_receiver_operating_characteristics_curve(fpr, tpr)