# Aggregate predicted GWAS variant effects from Borzoi and Decima

In [None]:
import anndata
import pandas as pd
import numpy as np
import os
import sys

## Paths

In [None]:
matrix_file='/gstore/data/resbioai/grelu/decima/20240823/data.h5ad'

out_dir='/gstore/data/resbioai/grelu/decima/20240823/gwas_44traits/positive_variants'

pos_file = os.path.join(out_dir, 'positive_variants_processed.csv')
decima_preds_file = os.path.join(out_dir, 'decima_preds.npy')

## Load data

In [None]:
ad = anndata.read_h5ad(matrix_file)

## Load variants

In [None]:
pos = pd.read_csv(pos_file)

## Load predictions

In [None]:
decima_preds = np.load(decima_preds_file)
decima_preds.shape

## For Decima: average VEP per cell type

In [None]:
idx_map = ad.obs.reset_index().groupby(['cell_type']).agg({'index': tuple}).reset_index().dropna()
idx_map.head()

In [None]:
decima_preds = idx_map['index'].apply(lambda idxs:decima_preds[:, ad.obs_names.isin(idxs)].mean(1))
decima_preds = np.stack(decima_preds).T
decima_preds.shape

In [None]:
gene_exp = idx_map['index'].apply(lambda idxs:ad.X[ad.obs_names.isin(idxs), :].mean(0))
gene_exp = np.stack(gene_exp).T
gene_exp.shape

In [None]:
var = pd.DataFrame(index=idx_map.cell_type.astype(str))

## choose the best gene (highest absmax VEP) per variant

In [None]:
idx_map = pos.reset_index().groupby(['variant'])['index'].apply(list).reset_index()
idx_map.head()

In [None]:
idx_map['best_decima'] = idx_map['index'].apply(
    lambda x: x[np.abs(decima_preds[x]).mean(1).argmax()])

## Subset predictions to the matched genes

In [None]:
decima_preds = anndata.AnnData(
    X=decima_preds[idx_map.best_decima], var=var, obs=pos.iloc[idx_map.best_decima].reset_index(drop=True))
decima_preds.shape

In [None]:
gene_exp = gene_exp[decima_preds.obs.gene.apply(lambda x: np.where(ad.var_names==x)[0][0])]
gene_exp.shape

In [None]:
decima_preds.layers['gene_exp'] = gene_exp

## Save

In [None]:
decima_out_file = os.path.join(out_dir, 'decima_preds_agg.h5ad')

In [None]:
decima_preds.write_h5ad(decima_out_file)