# FinnGenn Virus FDR Correction

In [None]:
# Imports here.
import numpy as np
import pandas as pd
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import fdrcorrection
import warnings
warnings.filterwarnings("ignore")

#Directory in Biowulf
os.chdir('/PATH/TO/FinnGen_Data')

In [None]:
#Loading all the disease files
#Files downloaded from FinnGen on 5-5-22
AD_finn = pd.read_csv('G6_ALZHEIMER_survival-analyses.csv')
ALS_finn = pd.read_csv('G6_ALS_survival-analyses.csv')
dementia_finn = pd.read_csv('F5_DEMENTIA_survival-analyses.csv')
MS_finn = pd.read_csv('G6_MS_survival-analyses.csv')
PD_finn = pd.read_csv('G6_PARKINSON_survival-analyses.csv')
vascular_finn = pd.read_csv('F5_VASCDEM_survival-analyses.csv')

#Loading our serach term, i.e. list of viruses -- created in previous step "FinnGen_Search_Terms"
search_terms = pd.read_csv('FinnGen_Search_Terms.csv') 

In [None]:
#Pick your NDD here
NDD = AD_finn
ndd = "AD"

In [None]:
#Select rows that are in our list of possible viruses
virus_list = list(search_terms['Endpoint'])

#Select viral exposures to before NDD diagnosis
had_virus = NDD[NDD['prior_name'].isin(virus_list)]

In [None]:
#Setting lag time to 0 (anytime before NDD diagnosis)
had_virus = had_virus.loc[had_virus['hr_lag'] == 0]

#Dropping columns we don't need
had_virus = had_virus.drop(['hr_lag', 'outcome_longname'], axis=1)
had_virus = had_virus[['outcome_name','prior_name', 'prior_longname', 'hr', 'ci_min', 'ci_max', 'p', 'N']]
had_virus = had_virus.rename(columns = {'outcome_name':'NDD', 'prior_name':'Viral_Code', 'prior_longname':'Virus_Description'})

In [None]:
#If you would like to look at individual lags before diagnosis omit above cell and run this one instead:

#Dropping columns we don't need
#had_virus = had_virus.drop(['outcome_longname'], axis=1)
#had_virus = had_virus[['outcome_name','prior_name', 'prior_longname', 'hr_lag','hr', 'ci_min', 'ci_max', 'p', 'N']]
#had_virus = had_virus.rename(columns = {'outcome_name':'NDD', 'prior_name':'Viral_Code', 'prior_longname':'Virus_Description'})

In [None]:
#checking results
print(len(had_virus))
had_virus.head()

In [None]:
#Adding FDR Correction

#Sort P-values
had_virus = had_virus.sort_values(by = "p")

#FDR Correction
rejected, p_corr = fdrcorrection(had_virus['p'], is_sorted=True)
had_virus['P_CORR'] = p_corr
had_virus['REJECTED'] = rejected

In [None]:
#Check results
had_virus.head()

In [None]:
#Save to csv
had_virus.to_csv(ndd + "_virus_finn_gen.csv", index=False)