# FinnGenn Virus FDR Correction with lags

In [1]:
# Imports here.
import numpy as np
import pandas as pd
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import fdrcorrection
import warnings
warnings.filterwarnings("ignore")

#Directory in Biowulf
os.chdir('/PATH/TO/FinnGen_Data')

In [2]:
#Loading all the disease files
#Files downloaded from FinnGen on 5-5-22
AD_finn = pd.read_csv('G6_ALZHEIMER_survival-analyses.csv')
ALS_finn = pd.read_csv('G6_ALS_survival-analyses.csv')
dementia_finn = pd.read_csv('F5_DEMENTIA_survival-analyses.csv')
MS_finn = pd.read_csv('G6_MS_survival-analyses.csv')
PD_finn = pd.read_csv('G6_PARKINSON_survival-analyses.csv')
vascular_finn = pd.read_csv('F5_VASCDEM_survival-analyses.csv')

#Loading our serach term, i.e. list of viruses
search_terms = pd.read_csv('FinnGen_Search_Terms.csv') 

In [3]:
#Pick your NDD here
NDD = AD_finn
ndd = "AD"

In [4]:
#Select rows that are in our list of possible viruses
virus_list = list(search_terms['Endpoint'])
had_virus = NDD[NDD['prior_name'].isin(virus_list)]

#Dropping columns we don't need
had_virus = had_virus.drop(['outcome_longname'], axis=1)
had_virus = had_virus[['outcome_name','prior_name', 'prior_longname', 'hr_lag','hr', 'ci_min', 'ci_max', 'p', 'N']]
had_virus = had_virus.rename(columns = {'outcome_name':'NDD', 'prior_name':'Viral_Code', 'prior_longname':'Virus_Description'})

#checking results
print(len(had_virus))

46


In [5]:
#Adding FDR Correction

#Sort P-values
had_virus = had_virus.sort_values(by = "p")

#FDR Correction
rejected, p_corr = fdrcorrection(had_virus['p'], is_sorted=True)
had_virus['P_CORR'] = p_corr
had_virus['REJECTED'] = rejected

In [6]:
had_virus

Unnamed: 0,NDD,Viral_Code,Virus_Description,hr_lag,hr,ci_min,ci_max,p,N,P_CORR,REJECTED
153,G6_ALZHEIMER,J10_INFLUPNEU,Influenza and pneumonia,1,22.99519,16.422046,32.199323,1.94818e-74,681,8.961626e-73,True
798,G6_ALZHEIMER,J10_INFLUPNEU,Influenza and pneumonia,5,6.755561,5.38453,8.475691,3.282922e-61,662,7.550720000000001e-60,True
100,G6_ALZHEIMER,J10_INFLUENZA,All influenza,1,30.294508,19.298979,47.554704,9.870331999999998e-50,80,1.513451e-48,True
1344,G6_ALZHEIMER,J10_INFLUPNEU,Influenza and pneumonia,0,4.107179,3.382205,4.987549,4.014982e-46,2141,4.617229e-45,True
362,G6_ALZHEIMER,J10_INFLUENZA,All influenza,5,12.61291,8.375495,18.994161,6.921081e-34,85,6.367394e-33,True
786,G6_ALZHEIMER,J10_INFLUENZA,All influenza,0,6.809482,4.680332,9.907213,1.150327e-23,214,7.628640000000001e-23,True
78,G6_ALZHEIMER,AB1_OTHER_INFECTIONS,Other infectious diseases,1,37.31,18.389691,75.696544,1.1608800000000002e-23,26,7.628640000000001e-23,True
107,G6_ALZHEIMER,J10_VIRALPNEUMO,Viral pneumonia,1,29.48053,14.839669,58.566106,4.381031e-22,26,2.519093e-21,True
228,G6_ALZHEIMER,AB1_OTHER_INFECTIONS,Other infectious diseases,5,17.820397,9.538701,33.292433,1.670984e-19,29,8.540585999999999e-19,True
156,G6_ALZHEIMER,AB1_VIRAL_NOS,"Other viral diseases, not elsewhere classified",1,22.620981,10.584606,48.344623,8.350117e-16,14,3.841054e-15,True


In [7]:
#Save to csv
had_virus.to_csv(ndd + "_virus_finn_gen_lag.csv", index=False)

In [8]:
#Load and check results
df2 = pd.read_csv(ndd + "_virus_finn_gen_lag.csv")
df2

Unnamed: 0,NDD,Viral_Code,Virus_Description,hr_lag,hr,ci_min,ci_max,p,N,P_CORR,REJECTED
0,G6_ALZHEIMER,J10_INFLUPNEU,Influenza and pneumonia,1,22.99519,16.422046,32.199323,1.94818e-74,681,8.961626e-73,True
1,G6_ALZHEIMER,J10_INFLUPNEU,Influenza and pneumonia,5,6.755561,5.38453,8.475691,3.282922e-61,662,7.550720000000001e-60,True
2,G6_ALZHEIMER,J10_INFLUENZA,All influenza,1,30.294508,19.298979,47.554704,9.870331999999998e-50,80,1.513451e-48,True
3,G6_ALZHEIMER,J10_INFLUPNEU,Influenza and pneumonia,0,4.107179,3.382205,4.987549,4.014982e-46,2141,4.617229e-45,True
4,G6_ALZHEIMER,J10_INFLUENZA,All influenza,5,12.61291,8.375495,18.994161,6.921081e-34,85,6.367394e-33,True
5,G6_ALZHEIMER,J10_INFLUENZA,All influenza,0,6.809482,4.680332,9.907213,1.150327e-23,214,7.628640000000001e-23,True
6,G6_ALZHEIMER,AB1_OTHER_INFECTIONS,Other infectious diseases,1,37.31,18.389691,75.696544,1.1608800000000002e-23,26,7.628640000000001e-23,True
7,G6_ALZHEIMER,J10_VIRALPNEUMO,Viral pneumonia,1,29.48053,14.839669,58.566106,4.381031e-22,26,2.519093e-21,True
8,G6_ALZHEIMER,AB1_OTHER_INFECTIONS,Other infectious diseases,5,17.820397,9.538701,33.292433,1.670984e-19,29,8.540585999999999e-19,True
9,G6_ALZHEIMER,AB1_VIRAL_NOS,"Other viral diseases, not elsewhere classified",1,22.620981,10.584606,48.344623,8.350117e-16,14,3.841054e-15,True
