## script that allows the display of the information from the publicly released data

### instructions
Execute all the cells, then head to the bottom and insert a SNP or a list of SNPs and execute that cell. This will call the function that retrieves the data that matches that SNP across all the datasets that have been released.

For QTLs we can print nominal p-values for all SNPs but FDR corrected p-values are only available for the top SNP for each locus. So we print both to get an idea of your locus (e.g. if your SNP is not top SNP but it's highly correlated with the top SNP then it's probably still significant)

In [6]:
import pandas as pd
import numpy as np
import gc

base_dir = "http://bartzabel.ls.manchester.ac.uk/orozcolab/SNP2Mechanism/"


In [8]:
gtf_annotation_df = pd.read_pickle(f"{base_dir}/metadata/gencode_gtf.pickle")
gtf_transcripts = gtf_annotation_df[(gtf_annotation_df["feature"] == "transcript") & (gtf_annotation_df["transcript_type"] == "protein_coding")].dropna(axis=1, how='all')
gtf_transcripts["gene_id"] = gtf_transcripts["gene_id"].str.split(".").str[0]
gtf_transcripts["transcript_id"] = gtf_transcripts["transcript_id"].str.split(".").str[0]
gtf_transcripts["TSS_start"] = gtf_transcripts.apply(lambda x: int(x["start"]) if x["strand"] == "+" else int(x["end"]) ,axis = 1)
gene_mapper = dict(zip(gtf_transcripts['gene_id'], gtf_transcripts['gene_name']))

del gtf_annotation_df
del gtf_transcripts
gc.collect()

50

In [9]:
RNA_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_nominal_CD4_merged.txt", sep = " ")
RNA_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_nominal_CD8_merged.txt", sep = " ")
ATAC_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_nominal_CD8_merged.txt", sep = " ")
ATAC_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_nominal_CD4_merged.txt", sep = " ")
ins_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_nominal_CD8_merged.txt", sep = " ")
ins_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_nominal_CD4_merged.txt", sep = " ")
loop_nominal_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_nominal_CD8_merged.txt", sep = " ")
loop_nominal_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_nominal_CD4_merged.txt", sep = " ")


RNA_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_permuted_CD4_FDR.txt", sep = " ")
RNA_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/RNA/RNA_permuted_CD8_FDR.txt", sep = " ")
ATAC_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_permuted_CD4_FDR.txt", sep = " ")
ATAC_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/ATAC/ATAC_permuted_CD8_FDR.txt", sep = " ")
ins_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_permuted_CD4_FDR.txt", sep = " ")
ins_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/ins_permuted_CD8_FDR.txt", sep = " ")
loop_permuted_CD4 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_permuted_CD4_FDR.txt", sep = " ")
loop_permuted_CD8 = pd.read_csv(f"{base_dir}/QTLs/HiC/loop_permuted_CD8_FDR.txt", sep = " ")

loop_allelic_imbalance_ALL = pd.read_csv(f"{base_dir}/hic/allelic_imbalance/allelic_imbalance_ALL_apeglm_results.csv")
loop_allelic_imbalance_CD8 = pd.read_csv(f"{base_dir}/hic/allelic_imbalance/allelic_imbalance_CD8_apeglm_results.csv")
loop_allelic_imbalance_CD4 = pd.read_csv(f"{base_dir}/hic/allelic_imbalance/allelic_imbalance_CD4_apeglm_results.csv")

atac_allelic_imbalance_all = pd.read_csv(f"{base_dir}/atac/allelic_imbalance/ATAC_ALL_allelic_imbalance_with_betabinom.csv.gz", index_col = 0)
atac_allelic_imbalance_CD8 = pd.read_csv(f"{base_dir}/atac/allelic_imbalance/ATAC_CD8_allelic_imbalance_with_betabinom.csv.gz", index_col = 0)
atac_allelic_imbalance_CD4 = pd.read_csv(f"{base_dir}/atac/allelic_imbalance/ATAC_CD4_allelic_imbalance_with_betabinom.csv.gz", index_col = 0)

In [10]:
RNA_nominal_CD4["gene_name"] = RNA_nominal_CD4['phe_id'].map(gene_mapper)
RNA_nominal_CD8["gene_name"] = RNA_nominal_CD8['phe_id'].map(gene_mapper)

In [11]:
def identify_QTL_permuted(var, nominal, permuted):
    # get list of all QTLs that are from the region of the nominal
    phenotypes_with_nominal_qtl = nominal[nominal["var_id"].isin(var)]["phe_id"].to_list()
    display(nominal[nominal["var_id"].isin(var)])
    display(permuted[permuted["phe_id"].isin(phenotypes_with_nominal_qtl)].drop(columns = ["dof1","dof2","bml1","bml2"]))

In [12]:
def get_all_tables(vars):
    if not type(vars) == list:
        vars = [vars]
    print("eQTL CD4")
    identify_QTL_permuted(vars, RNA_nominal_CD4, RNA_permuted_CD4)

    print("eQTL CD8")
    identify_QTL_permuted(vars, RNA_nominal_CD8, RNA_permuted_CD8)

    print("caQTL CD4")
    identify_QTL_permuted(vars, ATAC_nominal_CD4, ATAC_permuted_CD4)

    print("caQTL CD8")
    identify_QTL_permuted(vars, ATAC_nominal_CD8, ATAC_permuted_CD8)

    print("loopQTL CD4")
    identify_QTL_permuted(vars, loop_nominal_CD4, loop_permuted_CD4)

    print("loopQTL CD8")
    identify_QTL_permuted(vars, loop_nominal_CD8, loop_permuted_CD8)

    print("insQTL CD4")
    identify_QTL_permuted(vars, ins_nominal_CD4, ins_permuted_CD4)

    print("insQTL CD8")
    identify_QTL_permuted(vars, ins_nominal_CD8, ins_permuted_CD8)


    print("allelic imbalance ALL")
    display(atac_allelic_imbalance_all[atac_allelic_imbalance_all["ID"].isin(vars)])

    print("allelic imbalance CD4")
    display(atac_allelic_imbalance_CD4[atac_allelic_imbalance_CD4["ID"].isin(vars)])

    print("allelic imbalance CD8")
    display(atac_allelic_imbalance_CD8[atac_allelic_imbalance_CD8["ID"].isin(vars)])


    print("loops with allelic imbalance ALL")
    display(loop_allelic_imbalance_ALL[loop_allelic_imbalance_ALL["rsID"].isin(vars)])

    print("loops with allelic imbalance CD4")
    display(loop_allelic_imbalance_CD4[loop_allelic_imbalance_CD4["rsID"].isin(vars)])

    print("loops with allelic imbalance CD8")
    display(loop_allelic_imbalance_CD8[loop_allelic_imbalance_CD8["rsID"].isin(vars)])


## Fill in the following cell with the SNP or the list of SNPs
format: "rs4409785"
or: ["rs4409785", "rs61897793"]

In [13]:
get_all_tables("rs4409785")

eQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit,gene_name


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR


eQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit,gene_name


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR


caQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
5241669,18941,chr11,94786466,94786965,+,3964,791293,rs4409785,chr11,95578258,95578258,0.00175586,0.189787,-0.704744,1
5242633,18965,chr11,95213587,95214086,+,3998,364172,rs4409785,chr11,95578258,95578258,0.00705785,0.144491,-0.61492,0
5243047,18972,chr11,95263313,95263812,+,4026,314446,rs4409785,chr11,95578258,95578258,0.00229941,0.18112,-0.688463,0
5243258,18977,chr11,95329634,95330133,+,4074,248125,rs4409785,chr11,95578258,95578258,0.00292194,0.173371,-0.673575,0
5243562,18985,chr11,95547920,95548419,+,3956,29839,rs4409785,chr11,95578258,95578258,0.00425772,0.161107,-0.649314,0
5243591,18986,chr11,95578037,95578536,+,3994,0,rs4409785,chr11,95578258,95578258,7.21404e-09,0.512963,1.15862,1
5243745,18989,chr11,95605192,95605691,+,4001,-26934,rs4409785,chr11,95578258,95578258,0.0072654,0.143534,0.61288,0
5243872,18993,chr11,95674583,95675082,+,3938,-96325,rs4409785,chr11,95578258,95578258,0.00125237,0.20056,0.72447,0
5243893,18994,chr11,95680358,95680857,+,3936,-102100,rs4409785,chr11,95578258,95578258,0.00743416,0.142775,0.611257,0
5248264,19073,chr11,96561061,96561560,+,4442,-982803,rs4409785,chr11,95578258,95578258,0.00888568,0.136871,-0.598484,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
79723,18941,chr11,94786466,94786965,+,3964,791293,rs4409785,chr11,95578258,95578258,0.00175586,0.189787,-0.704744,0.636364,0.64373,0.906433
79747,18965,chr11,95213587,95214086,+,3998,368442,rs10831378,chr11,95582528,95582528,0.000243388,0.251305,-0.707673,0.162837,0.160773,0.637785
79754,18972,chr11,95263313,95263812,+,4026,430897,rs1527448,chr11,95694709,95694709,0.000148963,0.266008,-0.727462,0.12987,0.135088,0.603813
79759,18977,chr11,95329634,95330133,+,4074,251118,rs12421416,chr11,95581251,95581251,2.75953e-06,0.376493,1.09249,0.004995,0.002908,0.058332
79767,18985,chr11,95547920,95548419,+,3956,509009,rs16922958,chr11,96057428,96057428,0.000248368,0.250693,1.04025,0.184815,0.201387,0.681261
79768,18986,chr11,95578037,95578536,+,3994,0,rs4409785,chr11,95578258,95578258,7.21404e-09,0.512963,1.15862,0.000999,1.6e-05,0.000791
79771,18989,chr11,95605192,95605691,+,4001,427691,rs12574769,chr11,96033382,96033382,0.00240711,0.179643,-0.697453,0.822178,0.814991,0.957509
79775,18993,chr11,95674583,95675082,+,3938,344591,rs74891904,chr11,96019673,96019673,0.000231872,0.252767,-1.0583,0.180819,0.188589,0.667774
79776,18994,chr11,95680358,95680857,+,3936,970394,rs11225596,chr11,96651251,96651251,0.000695125,0.219087,0.649778,0.435564,0.41351,0.816814
79855,19073,chr11,96561061,96561560,+,4442,694690,rs36058326,chr11,97256250,97256250,0.00115542,0.203114,0.900509,0.59041,0.591589,0.888267


caQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
5622405,18952,chr11,95089538,95090037,+,3908,488221,rs4409785,chr11,95578258,95578258,0.00343752,0.127971,-0.606787,0
5623909,18986,chr11,95578037,95578536,+,3994,0,rs4409785,chr11,95578258,95578258,2.58439e-10,0.472266,1.16566,1
5624122,18992,chr11,95660004,95660503,+,4002,-81746,rs4409785,chr11,95578258,95578258,0.00489258,0.118989,0.585104,0
5627965,19072,chr11,96555776,96556275,+,4450,-977518,rs4409785,chr11,95578258,95578258,0.00629423,0.112558,-0.569073,0
5628048,19074,chr11,96567112,96567611,+,4433,-988854,rs4409785,chr11,95578258,95578258,0.00797856,0.106492,-0.553528,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
79734,18952,chr11,95089538,95090037,+,3908,572228,rs78235793,chr11,95662265,95662265,0.000484929,0.176945,0.953703,0.301698,0.294409,0.727432
79768,18986,chr11,95578037,95578536,+,3994,0,rs4409785,chr11,95578258,95578258,2.58439e-10,0.472266,1.16566,0.000999,5.52969e-07,2.9e-05
79774,18992,chr11,95660004,95660503,+,4002,670671,rs1150311,chr11,96331174,96331174,0.000646455,0.169864,-0.623553,0.378621,0.381644,0.781117
79854,19072,chr11,96555776,96556275,+,4450,699975,rs36058326,chr11,97256250,97256250,0.000736096,0.166653,0.877837,0.466533,0.461434,0.822016
79856,19074,chr11,96567112,96567611,+,4433,147122,rs3019728,chr11,96714733,96714733,0.00215156,0.139833,0.549307,0.804196,0.808736,0.946258


loopQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
4660457,65112,chr11,95152501,95707500,+,5191,0,rs4409785,chr11,95578258,95578258,0.000367159,0.26341,-1.02131,0
4660702,65117,chr11,95182501,95710000,+,5116,0,rs4409785,chr11,95578258,95578258,5.82038e-05,0.322417,-1.12992,1
4660899,65121,chr11,95227501,96392500,+,6730,0,rs4409785,chr11,95578258,95578258,0.0018154,0.208894,0.909502,0
4660976,65122,chr11,95230001,95690000,+,4977,0,rs4409785,chr11,95578258,95578258,0.00899126,0.151604,-0.774812,0
4661873,65140,chr11,95580001,95705000,+,4259,-1743,rs4409785,chr11,95578258,95578258,1.19512e-10,0.631489,1.58133,1


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
76557,65112,chr11,95152501,95707500,+,5191,774254,rs16923596,chr11,96481754,96481754,0.000111725,0.302004,1.34288,0.114885,0.119788,0.887738
76562,65117,chr11,95182501,95710000,+,5116,0,rs4409785,chr11,95578258,95578258,5.82038e-05,0.322417,-1.12992,0.056943,0.0573836,0.822867
76566,65121,chr11,95227501,96392500,+,6730,0,rs61610534,chr11,95680842,95680842,0.000129389,0.297337,-0.931899,0.156843,0.160555,0.902375
76567,65122,chr11,95230001,95690000,+,4977,0,rs7114693,chr11,95676960,95676960,0.00020733,0.282165,0.902918,0.180819,0.180594,0.90564
76585,65140,chr11,95580001,95705000,+,4259,-1743,rs4409785,chr11,95578258,95578258,1.19512e-10,0.631489,1.58133,0.000999,4.02504e-07,0.000269


loopQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
4825771,65074,chr11,94927501,95792500,+,5922,0,rs4409785,chr11,95578258,95578258,0.00948784,0.124726,-0.719906,0
4829545,65137,chr11,95437501,95675000,+,4459,0,rs4409785,chr11,95578258,95578258,0.00114957,0.188811,-0.885749,1
4829646,65140,chr11,95580001,95705000,+,4259,-1743,rs4409785,chr11,95578258,95578258,2.26243e-12,0.622531,1.60834,1


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
76519,65074,chr11,94927501,95792500,+,5922,325971,rs77046666,chr11,96118471,96118471,0.000892248,0.196309,-1.12154,0.563437,0.550183,0.937622
76582,65137,chr11,95437501,95675000,+,4459,0,rs4409785,chr11,95578258,95578258,0.00114957,0.188811,-0.885749,0.636364,0.62193,0.949617
76585,65140,chr11,95580001,95705000,+,4259,-1743,rs4409785,chr11,95578258,95578258,2.26243e-12,0.622531,1.60834,0.000999,1.94488e-08,1.5e-05


insQTL CD4


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
8063817,76165,chr11,95325001,95350000,+,4104,228258,rs4409785,chr11,95578258,95578258,0.007617,0.150982,-0.77308,0
8064654,76177,chr11,95625001,95650000,+,4073,-46743,rs4409785,chr11,95578258,95578258,0.000181,0.275405,1.04411,1
8064881,76178,chr11,95650001,95675000,+,4034,-71743,rs4409785,chr11,95578258,95578258,0.000228,0.268107,1.03019,1
8065141,76179,chr11,95675001,95700000,+,3966,-96743,rs4409785,chr11,95578258,95578258,0.001551,0.205644,0.902235,0


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
83790,76165,chr11,95325001,95350000,+,4104,191323,rs34536806,chr11,95541323,95541323,0.000828,0.226519,-1.04253,0.493506,0.485523,0.77138
83802,76177,chr11,95625001,95650000,+,4073,-46743,rs4409785,chr11,95578258,95578258,0.000181,0.275405,1.04411,0.145854,0.141802,0.496396
83803,76178,chr11,95650001,95675000,+,4034,-71743,rs4409785,chr11,95578258,95578258,0.000228,0.268107,1.03019,0.164835,0.164718,0.524459
83804,76179,chr11,95675001,95700000,+,3966,-41122,rs114004873,chr11,95633879,95633879,6.2e-05,0.308346,-1.09952,0.058941,0.061455,0.35231


insQTL CD8


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,best_hit
8547177,76177,chr11,95625001,95650000,+,4073,-46743,rs4409785,chr11,95578258,95578258,0.000648,0.20568,0.92447,0
8547301,76178,chr11,95650001,95675000,+,4034,-71743,rs4409785,chr11,95578258,95578258,0.00035,0.223567,0.963829,1


Unnamed: 0,phe_id,phe_chr,phe_from,phe_to,phe_strd,n_var_in_cis,dist_phe_var,var_id,var_chr,var_from,var_to,nom_pval,r_squared,slope,adj_emp_pval,adj_beta_pval,FDR
83778,76177,chr11,95625001,95650000,+,4073,0,rs11021260,chr11,95635736,95635736,5.4e-05,0.275949,-0.923159,0.036963,0.046287,0.289503
83779,76178,chr11,95650001,95675000,+,4034,-71743,rs4409785,chr11,95578258,95578258,0.00035,0.223567,0.963829,0.267732,0.257868,0.6042


allelic imbalance ALL


Unnamed: 0,CHROM,POS,ID,REF,ALT,combined_p_val_greater,combined_p_val_less,tot_REF,tot_ALT,ratio,...,eQTLgen_pval,ATAC_hic_corr_score,snp,hsc_genes,tcell_genes,all_genes,CD4_loop_svalue,CD8_loop_svalue,ALL_loop_svalue,svalues_betabinom
264902,chr11,95578258,rs4409785,T,C,0.999995,4.4e-05,3.0,23.0,7.666667,...,"['SESN3: 61.3', 'RP11-712B9.2: 7.1']",0.06,rs4409785,,,,,,,


allelic imbalance CD4


Unnamed: 0,CHROM,POS,ID,REF,ALT,combined_p_val_greater,combined_p_val_less,tot_REF,tot_ALT,ratio,...,eQTLgen_pval,ATAC_hic_corr_score,snp,hsc_genes,tcell_genes,all_genes,CD4_loop_svalue,CD8_loop_svalue,ALL_loop_svalue,svalues_betabinom
184368,chr11,95578258,rs4409785,T,C,0.999995,4.4e-05,3.0,23.0,7.666667,...,"['SESN3: 61.3', 'RP11-712B9.2: 7.1']",0.06,rs4409785,,,,,,,


allelic imbalance CD8


Unnamed: 0,CHROM,POS,ID,REF,ALT,combined_p_val_greater,combined_p_val_less,tot_REF,tot_ALT,ratio,...,eQTLgen_pval,ATAC_hic_corr_score,snp,hsc_genes,tcell_genes,all_genes,CD4_loop_svalue,CD8_loop_svalue,ALL_loop_svalue,svalues_betabinom


loops with allelic imbalance ALL


Unnamed: 0.1,Unnamed: 0,chrA,startA,endA,chrB,startB,endB,loopID,loopScore,rsID,rsCoord,REF_counts,ALT_counts,svalue


loops with allelic imbalance CD4


Unnamed: 0.1,Unnamed: 0,chrA,startA,endA,chrB,startB,endB,loopID,loopScore,rsID,rsCoord,REF_counts,ALT_counts,svalue


loops with allelic imbalance CD8


Unnamed: 0.1,Unnamed: 0,chrA,startA,endA,chrB,startB,endB,loopID,loopScore,rsID,rsCoord,REF_counts,ALT_counts,svalue
