# this script allows to print all the information for a specific SNP

In [1]:
import pandas as pd
import numpy as np
from multiprocessing import Pool
from functools import partial
import glob
import os
import plotly.express as px
import math
import matplotlib.pyplot as plt
from matplotlib import colors
import pybedtools as pbed

from scipy import stats, special
from statsmodels.stats import multitest
import statsmodels.api as sm
import statsmodels.formula.api as smf
import plotly.io as pio
import seaborn as sns

from functools import reduce

plt.rcParams['svg.fonttype'] = 'none'
pd.set_option('display.max_columns', 500)

base_dir = "http://bartzabel.ls.manchester.ac.uk/orozcolab/SNP2Mechanism/"


In [2]:
gtf_annotation_df = pd.read_pickle(f"{base_dir}/metadata/gencode_gtf.pickle")
gtf_transcripts = gtf_annotation_df[(gtf_annotation_df["feature"] == "transcript") & (gtf_annotation_df["transcript_type"] == "protein_coding")].dropna(axis=1, how='all')
gtf_transcripts["gene_id"] = gtf_transcripts["gene_id"].str.split(".").str[0]
gtf_transcripts["transcript_id"] = gtf_transcripts["transcript_id"].str.split(".").str[0]
gtf_transcripts["TSS_start"] = gtf_transcripts.apply(lambda x: int(x["start"]) if x["strand"] == "+" else int(x["end"]) ,axis = 1)
gene_mapper = dict(zip(gtf_transcripts['gene_id'], gtf_transcripts['gene_name']))

In [3]:
RNA_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_nominal_CD4_merged.txt", sep = " ")
RNA_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_nominal_CD8_merged.txt", sep = " ")
ATAC_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_nominal_CD8_merged.txt", sep = " ")
ATAC_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_nominal_CD4_merged.txt", sep = " ")
ins_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_nominal_CD8_merged.txt", sep = " ")
ins_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_nominal_CD4_merged.txt", sep = " ")
loop_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_nominal_CD8_merged.txt", sep = " ")
loop_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_nominal_CD4_merged.txt", sep = " ")


RNA_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_permuted_CD4_FDR.txt", sep = " ")
RNA_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_permuted_CD8_FDR.txt", sep = " ")
ATAC_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_permuted_CD4_FDR.txt", sep = " ")
ATAC_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_permuted_CD8_FDR.txt", sep = " ")
ins_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_permuted_CD4_FDR.txt", sep = " ")
ins_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_permuted_CD8_FDR.txt", sep = " ")
loop_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_permuted_CD4_FDR.txt", sep = " ")
loop_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_permuted_CD8_FDR.txt", sep = " ")

loop_allelic_imbalance_ALL = pd.read_csv(f"../HiC_allelic_imbalance/.local/results/allelic_imbalance_ALL_apeglm_results.csv")
loop_allelic_imbalance_CD8 = pd.read_csv(f"../HiC_allelic_imbalance/.local/results/allelic_imbalance_CD8_apeglm_results.csv")
loop_allelic_imbalance_CD4 = pd.read_csv(f"../HiC_allelic_imbalance/.local/results/allelic_imbalance_CD4_apeglm_results.csv")

atac_allelic_imbalance_all = pd.read_csv("../ATAC_allelic_imbalance/.local/results/ATAC_ALL_allelic_imbalance_with_betabinom.csv.gz", index_col = 0)
atac_allelic_imbalance_CD8 = pd.read_csv("../ATAC_allelic_imbalance/.local/results/ATAC_CD8_allelic_imbalance_with_betabinom.csv.gz", index_col = 0)
atac_allelic_imbalance_CD4 = pd.read_csv("../ATAC_allelic_imbalance/.local/results/ATAC_CD4_allelic_imbalance_with_betabinom.csv.gz", index_col = 0)


In [4]:
RNA_nominal_CD4["gene_name"] = RNA_nominal_CD4['phe_id'].map(gene_mapper)
RNA_nominal_CD8["gene_name"] = RNA_nominal_CD8['phe_id'].map(gene_mapper)

In [5]:
def identify_QTL_permuted(var, nominal, permuted):
    # get list of all QTLs that are from the region of the nominal
    phenotypes_with_nominal_qtl = nominal[nominal["var_id"].isin(var)]["phe_id"].to_list()
    display(nominal[nominal["var_id"].isin(var)])
    display(permuted[permuted["phe_id"].isin(phenotypes_with_nominal_qtl)].drop(columns = ["dof1","dof2","bml1","bml2"]))

In [6]:
def get_all_tables(vars):
    if not type(vars) == list:
        vars = [vars]
    print("eQTL CD4")
    identify_QTL_permuted(vars, RNA_nominal_CD4, RNA_permuted_CD4)

    print("eQTL CD8")
    identify_QTL_permuted(vars, RNA_nominal_CD8, RNA_permuted_CD8)

    print("caQTL CD4")
    identify_QTL_permuted(vars, ATAC_nominal_CD4, ATAC_permuted_CD4)

    print("caQTL CD8")
    identify_QTL_permuted(vars, ATAC_nominal_CD8, ATAC_permuted_CD8)

    print("loopQTL CD4")
    identify_QTL_permuted(vars, loop_nominal_CD4, loop_permuted_CD4)

    print("loopQTL CD8")
    identify_QTL_permuted(vars, loop_nominal_CD8, loop_permuted_CD8)

    print("insQTL CD4")
    identify_QTL_permuted(vars, ins_nominal_CD4, ins_permuted_CD4)

    print("insQTL CD8")
    identify_QTL_permuted(vars, ins_nominal_CD8, ins_permuted_CD8)


    print("allelic imbalance ALL")
    display(atac_allelic_imbalance_all[atac_allelic_imbalance_all["ID"].isin(vars)])

    print("allelic imbalance CD4")
    display(atac_allelic_imbalance_CD4[atac_allelic_imbalance_CD4["ID"].isin(vars)])

    print("allelic imbalance CD8")
    display(atac_allelic_imbalance_CD8[atac_allelic_imbalance_CD8["ID"].isin(vars)])


    print("loops with allelic imbalance ALL")
    display(loop_allelic_imbalance_ALL[loop_allelic_imbalance_ALL["rsID"].isin(vars)])

    print("loops with allelic imbalance CD4")
    display(loop_allelic_imbalance_CD4[loop_allelic_imbalance_CD4["rsID"].isin(vars)])

    print("loops with allelic imbalance CD8")
    display(loop_allelic_imbalance_CD8[loop_allelic_imbalance_CD8["rsID"].isin(vars)])


In [26]:
get_all_tables("rs13401811")

eQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit,gene_name


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR


eQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit,gene_name


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR


caQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
659007,55016,chr2,110858287,110858786,+,2469,0,rs13401811,chr2,110858527,110858527,0.000451,0.232495,-0.813462,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
10459,55016,chr2,110858287,110858786,+,2469,-8463,rs7576541,chr2,110849824,110849824,4.06096e-08,0.47656,-1.03144,0.000999,6e-05,0.002367


caQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
694837,55016,chr2,110858287,110858786,+,2469,0,rs13401811,chr2,110858527,110858527,1e-06,0.310338,-0.88706,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
10459,55016,chr2,110858287,110858786,+,2469,-8188,rs72832868,chr2,110850099,110850099,1.95993e-07,0.351451,-0.935442,0.000999,0.00027,0.006267


loopQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
593331,14298,chr2,110855001,111465000,+,3555,0,rs13401811,chr2,110858527,110858527,0.00248,0.197913,-0.717554,1
593575,14307,chr2,111012501,111730000,+,4123,-153974,rs13401811,chr2,110858527,110858527,0.007107,0.160169,-0.604286,0
596047,14363,chr2,111747501,111852500,+,4052,-888974,rs13401811,chr2,110858527,110858527,0.00561,0.168733,-0.66255,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
9631,14298,chr2,110855001,111465000,+,3555,0,rs13401811,chr2,110858527,110858527,0.00248,0.197913,-0.717554,0.801199,0.791922,0.992202
9640,14307,chr2,111012501,111730000,+,4123,0,rs116461064,chr2,111280632,111280632,0.000242,0.277179,-0.786285,0.231768,0.221891,0.919648
9696,14363,chr2,111747501,111852500,+,4052,-262792,rs28653258,chr2,111484709,111484709,0.000201,0.28321,-0.822725,0.174825,0.170115,0.90341


loopQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR


insQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
892144,14397,chr2,110925001,110950000,+,2753,-66474,rs13401811,chr2,110858527,110858527,0.009536,0.143105,-0.618911,0
892492,14400,chr2,111000001,111025000,+,2966,-141474,rs13401811,chr2,110858527,110858527,0.007402,0.15198,-0.637813,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
9509,14397,chr2,110925001,110950000,+,2753,-75177,rs7576541,chr2,110849824,110849824,7.89986e-10,0.580143,-1.16424,0.000999,2e-06,0.000211
9512,14400,chr2,111000001,111025000,+,2966,-80434,rs12151764,chr2,110919567,110919567,5.19813e-07,0.439371,-1.00411,0.000999,0.000468,0.014956


insQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
924063,14371,chr2,110275001,110300000,+,2430,558527,rs13401811,chr2,110858527,110858527,0.003834,0.152552,0.621246,0
925818,14424,chr2,111600001,111625000,+,3628,-741474,rs13401811,chr2,110858527,110858527,0.009671,0.124136,0.560406,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
9485,14371,chr2,110275001,110300000,+,2430,331098,rs140541720,chr2,110631098,110631098,0.00053,0.211552,-0.698239,0.222777,0.214412,0.564133
9531,14424,chr2,111600001,111625000,+,3628,-761641,rs2612696,chr2,110838360,110838360,0.001383,0.183317,0.547036,0.634366,0.619974,0.825522


allelic imbalance ALL


Unnamed: 0,CHROM,POS,ID,REF,ALT,combined_p_val_greater,combined_p_val_less,tot_REF,tot_ALT,ratio,n_pat,corrected_p_val_greater,corrected_p_val_less,TF_remap,TF_JASPAR,eQTLgen_gene,eQTLgen_symbol,eQTLgen_pval,ATAC_hic_corr_score,snp,hsc_genes,tcell_genes,all_genes,CD4_loop_svalue,CD8_loop_svalue,ALL_loop_svalue,svalues_betabinom
25963,chr2,110858527,rs13401811,G,A,1.594457e-98,1.0,3724.0,2207.0,0.592642,29.0,1.856879e-96,1.0,"{'MEF2B', 'LMO1', 'KDM1A', 'CREM', 'ETS1', 'TE...","{'Ptf1A', 'FOS::JUN', 'FOSB::JUN', 'JDP2', 'SN...",,,,0.1,rs13401811,,,,,,,0.078133


allelic imbalance CD4


Unnamed: 0,CHROM,POS,ID,REF,ALT,combined_p_val_greater,combined_p_val_less,tot_REF,tot_ALT,ratio,n_pat,corrected_p_val_greater,corrected_p_val_less,TF_remap,TF_JASPAR,eQTLgen_gene,eQTLgen_symbol,eQTLgen_pval,ATAC_hic_corr_score,snp,hsc_genes,tcell_genes,all_genes,CD4_loop_svalue,CD8_loop_svalue,ALL_loop_svalue,svalues_betabinom
22304,chr2,110858527,rs13401811,G,A,7.019788e-45,0.999139,1249.0,653.0,0.522818,12.0,5.584316e-43,1.0,"{'MEF2B', 'LMO1', 'KDM1A', 'CREM', 'ETS1', 'TE...","{'Ptf1A', 'FOS::JUN', 'FOSB::JUN', 'JDP2', 'SN...",,,,0.1,rs13401811,,,,,,,0.02631


allelic imbalance CD8


Unnamed: 0,CHROM,POS,ID,REF,ALT,combined_p_val_greater,combined_p_val_less,tot_REF,tot_ALT,ratio,n_pat,corrected_p_val_greater,corrected_p_val_less,TF_remap,TF_JASPAR,eQTLgen_gene,eQTLgen_symbol,eQTLgen_pval,ATAC_hic_corr_score,snp,hsc_genes,tcell_genes,all_genes,CD4_loop_svalue,CD8_loop_svalue,ALL_loop_svalue,svalues_betabinom
75435,chr2,110858527,rs13401811,G,A,3.221446e-56,1.0,2475.0,1554.0,0.627879,17.0,3.276625e-54,1.0,"{'MEF2B', 'LMO1', 'KDM1A', 'CREM', 'ETS1', 'TE...","{'Ptf1A', 'FOS::JUN', 'FOSB::JUN', 'JDP2', 'SN...",,,,0.1,rs13401811,,,,,,,0.258585


loops with allelic imbalance ALL


Unnamed: 0.1,Unnamed: 0,chrA,startA,endA,chrB,startB,endB,loopID,loopScore,rsID,rsCoord,REF_counts,ALT_counts,svalue


loops with allelic imbalance CD4


Unnamed: 0.1,Unnamed: 0,chrA,startA,endA,chrB,startB,endB,loopID,loopScore,rsID,rsCoord,REF_counts,ALT_counts,svalue


loops with allelic imbalance CD8


Unnamed: 0.1,Unnamed: 0,chrA,startA,endA,chrB,startB,endB,loopID,loopScore,rsID,rsCoord,REF_counts,ALT_counts,svalue
