# Selecting Viral Search Terms from FinnGen

In [1]:
# Imports here.
import numpy as np
import pandas as pd
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
warnings.filterwarnings("ignore")

#Directory in Biowulf
os.chdir('/PATH/TO/FinnGen_Files')

In [2]:
#Loading all the disease files
#Files downloaded from FinnGen on 5-5-22
AD_finn = pd.read_csv('G6_ALZHEIMER_survival-analyses.csv')
ALS_finn = pd.read_csv('G6_ALS_survival-analyses.csv')
dementia_finn = pd.read_csv('F5_DEMENTIA_survival-analyses.csv')
MS_finn = pd.read_csv('G6_MS_survival-analyses.csv')
PD_finn = pd.read_csv('G6_PARKINSON_survival-analyses.csv')
vascular_finn = pd.read_csv('F5_VASCDEM_survival-analyses.csv')

#Load file with all the FinnGen Endpoints
finngen = pd.read_csv("finngen_endpoints.tsv", delimiter="\t")
print(len(finngen))

3095


In [3]:
df = pd.concat([AD_finn, ALS_finn, dementia_finn, MS_finn, PD_finn, vascular_finn], ignore_index=True)
print(len(df))

prior_list = list(df['prior_name'])
prior_list = set(prior_list)
print("Number of endpoints with NDD data:", len(prior_list))

15660
Number of endpoints with NDD data: 1385


In [4]:
#from all the endpoints, select the categories we have data for
finngen_data = finngen[finngen.phenocode.isin(prior_list)==True]
print(len(finngen_data))

1350


In [5]:
#Create a lowercast list of all the code descriptions
my_list = finngen_data["phenotype"].str.lower()

#List of all viral-related keywords
virus = ['viral', 'virus', 'mononucleosis', 'epstein', 'ebv', 'encephalitis', 'hepatitis', 'meningitis', 'warts',
        'influenza', "bell's_palsy", 's palsy', 'chicken', 'shingles', 'zoster', 'measles', 'varicella', 'herpes', 'myocarditis', 
         'erythema multiforme', "subacute thyroiditis", 'cold']

#Create empty df
df = pd.DataFrame()

# Add each virus to df
for sub in virus:
    finngen_data[sub]= my_list.str.find(sub)
    df1 = finngen_data.loc[finngen_data[sub] != -1]
    print(sub, 'codes:', len(df1))
    df = df.append(df1)

viral codes: 16
virus codes: 0
mononucleosis codes: 1
epstein codes: 0
ebv codes: 0
encephalitis codes: 2
hepatitis codes: 2
meningitis codes: 4
warts codes: 1
influenza codes: 3
bell's_palsy codes: 0
s palsy codes: 1
chicken codes: 1
shingles codes: 0
zoster codes: 1
measles codes: 0
varicella codes: 1
herpes codes: 4
myocarditis codes: 1
erythema multiforme codes: 1
subacute thyroiditis codes: 1
cold codes: 1


In [6]:
#Remove duplicates
total_codes = df.drop_duplicates(subset=['phenocode'])
print("Without duplicates:", len(total_codes))

Without duplicates: 32


In [7]:
#Create df of codes, meaning, number of cases
search_terms = total_codes[['phenocode','phenotype', 'number of cases']]

print(len(search_terms))
search_terms

32


Unnamed: 0,phenocode,phenotype,number of cases
1013,AB1_VIRAL_OTHER_INTEST_INFECTIONS,Viral and other specified intestinal infections,6053
1014,AB1_VIRAL_HEMOR_FEVER_NOS,"Viral haemorrhagic fevers, not elsewhere class...",1695
1526,AB1_VIRAL_WARTS,Viral warts,2229
1527,AB1_VIRAL_SKIN_MUCOUS_MEMBRANE,Viral infections characterized by skin and muc...,10658
1542,AB1_ANOGENITAL_HERPES_SIMPLEX,Anogenital herpesviral [herpes simplex] infection,1344
2289,J10_VIRALPNEUMO,Viral pneumonia,2272
2442,H7_HERPESKERATITIS,Herpesviral keratitis and keratoconjunctivitis,878
3040,AB1_VIRAL_NOS,"Other viral diseases, not elsewhere classified",4604
3041,AB1_VIRAL_MENINGITIS,Viral meningitis,803
3042,AB1_VIRAL_INOTHER,Viral agents as the cause of diseases classifi...,401


In [8]:
#Create and save csv file for search terms to use in analysis
search_terms.to_csv("FinnGen_search_terms.csv", index=False)

In [9]:
#Load and check results
df3 = pd.read_csv("FinnGen_search_terms.csv")
df3

Unnamed: 0,phenocode,phenotype,number of cases
0,AB1_VIRAL_OTHER_INTEST_INFECTIONS,Viral and other specified intestinal infections,6053
1,AB1_VIRAL_HEMOR_FEVER_NOS,"Viral haemorrhagic fevers, not elsewhere class...",1695
2,AB1_VIRAL_WARTS,Viral warts,2229
3,AB1_VIRAL_SKIN_MUCOUS_MEMBRANE,Viral infections characterized by skin and muc...,10658
4,AB1_ANOGENITAL_HERPES_SIMPLEX,Anogenital herpesviral [herpes simplex] infection,1344
5,J10_VIRALPNEUMO,Viral pneumonia,2272
6,H7_HERPESKERATITIS,Herpesviral keratitis and keratoconjunctivitis,878
7,AB1_VIRAL_NOS,"Other viral diseases, not elsewhere classified",4604
8,AB1_VIRAL_MENINGITIS,Viral meningitis,803
9,AB1_VIRAL_INOTHER,Viral agents as the cause of diseases classifi...,401
