# LAI-Net and Neural ADMIXTURE Training and Evaluation

## Training Step


### (0) Download the datasets from:

### (1) Install LAI-Net and Neural admixture
+ Neural ADMIXTURE: https://github.com/AI-sandbox/neural-admixture
+ LAI-Net: https://github.com/AI-sandbox/LAI-Net

### (2) Run the command to train LAI-Net from: https://github.com/AI-sandbox/LAI-Net

### (3) Run the command to train Neural ADMIXTURE from: https://github.com/AI-sandbox/neural-admixture

### (4) Copy the network paths into the following cell

In [1]:
LAINET_PATH = 'lainet_trained.pth'
NEURALADM_PATH = 'adv.pt'
device = 'cpu'
founders_vcf_file_path = 'founders_train.vcf'
founders_map_file_path = 'founders_train.map'

### Importing dependecies, loading dataset and networks

In [4]:
# Importing libraries
print('Start importing required libraries...')
import os, sys, time, allel, yaml, math, gzip, torch
sys.path.append('../LAI-Net/')
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from collections import Counter
from scipy.interpolate import interp1d
from lainet.utils.reader import load_founders_from_vcf_and_map
print('Done importing, using pytorch version {}...'.format(torch.__version__))

Start importing required libraries...
Done importing, using pytorch version 1.12.1+cu102...


In [5]:
# Loading dataset
snps_all, labels_all, _, _, ancestry_names, _ = load_founders_from_vcf_and_map(founders_vcf_file_path, founders_map_file_path, make_haploid=True, random_split=False, verbose=True)

subset = 4
snps, labels = snps_all[0::subset,:], labels_all[0::subset]
print(snps.shape, labels.shape)

Loading vcf and .map files...
Done loading vcf and .map files...
A total of 3200 diploid individuals where found in the vcf and .map
A total of 8 ancestries where found: ['AFR' 'AHG' 'AMR' 'EAS' 'EUR' 'OCE' 'SAS' 'WAS']
(1440, 516800) (1440,)


In [6]:
# Loading LAI-Net
import lainet
print(f'Loading LAI-Net from {LAINET_PATH} ...')
LN = torch.load(LAINET_PATH, map_location=device)
LN.device = device
LN = LN.to(device)
LN = LN.eval()
print('Done loading LAI-Net')

Loading LAI-Net from /local-scratch/mrivas/dmasmont/misc/lnpfoo/lainet_trained.pth ...
Done loading LAI-Net


In [7]:
# Loading Neural ADMIXTURE
from neural_admixture.model.neural_admixture import NeuralAdmixture
print(f'Loading Neural ADMIXTURE from {NEURALADM_PATH} ...')
NADM= NeuralAdmixture([8], num_features=516800)
NADM.load_state_dict(torch.load(NEURALADM_PATH, map_location=device), strict=True)
NADM.device = device
NADM = NADM.to(device)
NADM = NADM.eval()
print('Done loading Neural ADMIXTURE')

Loading Neural ADMIXTURE from /local-scratch/mrivas/dmasmont/misc/lnpfoo/adv.pt ...
Done loading Neural ADMIXTURE


### Dataset Evaluation

In [8]:
def forward_haploid(net, x, do_mean=True):
    out_base = net.forward_base(x)
    out_base = torch.stack([out_base, out_base], dim=3)
    out = net.forward_smoother(out_base)[:,:,:,0]
    if do_mean:
        out = torch.mean(out, dim=2)
    pred = torch.argmax(out, dim=1)
    return out, pred


def forward_nadm(nadm, x):
    _, out = nadm(x)
    prob = out[0]
    pred = torch.argmax(prob, dim=1)
    return prob, pred



for net, forward_func, name in zip([LN, NADM], [forward_haploid, forward_nadm], ['LAI-Net', 'Neural ADMIXTURE']):
    print(f'Evaluating {name}')
    
    for i, anc_name in zip(range(8), ancestry_names):
        x = torch.tensor(snps[labels==i,:]).float().to(device)
        y = torch.tensor(labels[labels==i]).long().to(device)
        out, pred = forward_func(net, x)

        if len(pred.shape) == 2:
            y = y.unsqueeze(1).repeat(1,pred.shape[1])

        acc = torch.mean((pred.flatten() == y.flatten()).float())
        print(anc_name, i,'Accuracy (%) is : ', acc.item()*100)

Evaluating LAI-Net
AFR 0 Accuracy (%) is :  100.0
AHG 1 Accuracy (%) is :  100.0
AMR 2 Accuracy (%) is :  100.0
EAS 3 Accuracy (%) is :  100.0
EUR 4 Accuracy (%) is :  98.33333492279053
OCE 5 Accuracy (%) is :  100.0
SAS 6 Accuracy (%) is :  96.66666388511658
WAS 7 Accuracy (%) is :  97.22222089767456
Evaluating Neural ADMIXTURE
AFR 0 Accuracy (%) is :  100.0
AHG 1 Accuracy (%) is :  100.0
AMR 2 Accuracy (%) is :  100.0
EAS 3 Accuracy (%) is :  100.0
EUR 4 Accuracy (%) is :  100.0
OCE 5 Accuracy (%) is :  100.0
SAS 6 Accuracy (%) is :  100.0
WAS 7 Accuracy (%) is :  100.0
