In [12]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.stats.multitest import multipletests


In [21]:
GWAS_RESULTS_FILE = "/cs/labs/michall/roeizucker/IIH/GWAS/results/iih_mod_gwas_with_genes.csv"
PWAS_RESULTS_FILE = "/cs/labs/michall/roeizucker/IIH/PWAS/results/updated_Both.csv"
SKAT_RESULTS_FILE = "/cs/labs/michall/roeizucker/IIH/IIH_results_full.csv"
GWAS_PVAL_THREASHOLD = 5e-7
SKAT_PVAL_THREASHOLD = 5e-5
SKAT_QVAL_THREASHOLD = 0.05
SKAT_USE_FDR = True

In [22]:
gwas_df = pd.read_csv(GWAS_RESULTS_FILE)
pwas_df = pd.read_csv(PWAS_RESULTS_FILE)
skat_df = pd.read_csv(SKAT_RESULTS_FILE)
skat_df['q_values'] = None
p_values = skat_df['P.value'].values

# Apply FDR correction
_, q_values, _, _ = multipletests(p_values, method='fdr_bh')

# Assign the corrected q-values back to the DataFrame
skat_df['q_values'] = q_values

# Print the DataFrame with the corrected q-values

In [23]:
gwas_df_filtered = gwas_df[gwas_df["P"] < GWAS_PVAL_THREASHOLD]
gwas_genes = set(gwas_df_filtered["gene_symbol"].unique())
pwas_genes = set(pwas_df[pwas_df["fdr_significance"]]["symbol"].dropna().unique())
if SKAT_USE_FDR:
    skat_genes = set(skat_df[skat_df["q_values"] < SKAT_QVAL_THREASHOLD]["symbol"].unique())
else:
    skat_genes = set(skat_df[skat_df["P.value"] < SKAT_PVAL_THREASHOLD]["symbol"].unique())

In [26]:
print("all intersection:\t",gwas_genes.intersection(pwas_genes).intersection(skat_genes))
print("gwas,pwas intersection:\t",gwas_genes.intersection(pwas_genes))
print("pwas,skat intersection:\t",pwas_genes.intersection(skat_genes))
print("gwas,skat intersection:\t",gwas_genes.intersection(skat_genes))

all intersection:	 {'FOXF1', 'RGCC'}
gwas,pwas intersection:	 {'FOXF1', 'RGCC'}
pwas,skat intersection:	 {'FOXF1', 'PMM2', 'RGCC', 'NME2', 'BARHL1', 'CLDN25', 'MFSD14C', 'BARX2', 'NOVA2', 'LYPD1', 'OASL'}
gwas,skat intersection:	 {'FOXF1', 'PEX11A', 'TRMT5', 'RGCC', 'FGFR1', 'IRGQ', 'EEF2K', 'NUSAP1', 'SLC28A3', 'TPO', 'MAPK15', 'LSM11'}
