### Uses a very broad search (Parent term contains string 'Cardiovascular') to select a large number of trait IDs (these are later pruned)

In [1]:
# supress warnings
options(warn=-1)

In [2]:
# get date for time stamping
today=format(Sys.Date(), "%Y-%m-%d")

In [24]:
# load packages
library('gwasrapidd')
library('tidyverse')
library('glue')                     

In [1]:
# Get all efo_ids 
ebi<-read_csv('/home/jovyan/EBI_codes.csv',show_col_types = FALSE)
head(ebi)

ERROR: Error in read_csv("/home/jovyan/EBI_codes.csv", show_col_types = FALSE): could not find function "read_csv"


In [45]:
# get traits for Cardiovascular Disease Traits
cv_only <- ebi %>% filter(str_detect(ebi$`Parent term`, "ardiovascular") | str_detect(ebi$`Disease trait`, "ongenital")) # get a broad list of traits
efo_ids=unique(cv_only$EFO_ids)
efo_ids<-append(efo_ids,'MONDO_0005178') # Add Osteoarthritis as a negative control

In [48]:
# get metadata for those trait terms, the efo_ids in particular
efos <- get_traits(efo_id = efo_ids)
efos <- efos@traits # gets the (only) traits 'slot' of the output
print(dim(efos))
head(efos)

[1] 273   3


efo_id,trait,uri
<chr>,<chr>,<chr>
EFO_0004715,mri defined brain infarct,http://www.ebi.ac.uk/efo/EFO_0004715
EFO_0004718,vascular dementia,http://www.ebi.ac.uk/efo/EFO_0004718
EFO_0001666,aortic aneurysm,http://www.ebi.ac.uk/efo/EFO_0001666
Orphanet_136,cerebral autosomal dominant arteriopathy with subcortical infarcts and leukoencephalopathy,http://www.orpha.net/ORDO/Orphanet_136
EFO_0004745,nt-probnp measurement,http://www.ebi.ac.uk/efo/EFO_0004745
EFO_0011035,TP segment duration,http://www.ebi.ac.uk/efo/EFO_0011035


In [51]:
# Make a table of index SNPs for each trait

list_of_tibbles<-list()

for (i in seq_along(efos$trait)) {
    if (file.exists(glue('/nfs/team205/heart/EBI_GWAS/index_snps/{efos$efo_id[i]}_{efos$trait[i]}_index_SNPs.csv'))==FALSE) {
        print(glue('{efos$trait[i]}: file needs to be made'))
        variants_table<-get_variants(efo_id=efos$efo_id[i])@variants #gets variants for the trait
        variants_table<-variants_table[complete.cases(variants_table), ] # removes any variants where SNP coordinates are not known, since these are useless to us
        if (dim(variants_table)[1]>0) { # only keep traits for which there are SNPs
            variants_table$efo_id=efos$efo_id[i]
            variants_table$efo_term=efos$trait[i]
            n_SNPs<-dim(variants_table)[1]
            print(efos$efo_id[i])
            list_of_tibbles[[i]]<-variants_table
            write_csv(list_of_tibbles[[i]],glue('/nfs/team205/heart/EBI_GWAS/index_snps/{efos$efo_id[i]}_{efos$trait[i]}_index_SNPs.csv'))
        }
    } else {
        print(glue('{efos$trait[i]}: file already exists'))
    }
}

mri defined brain infarct: file needs to be made
vascular dementia: file needs to be made
[1] "EFO_0004718"
aortic aneurysm: file needs to be made
[1] "EFO_0001666"
cerebral autosomal dominant arteriopathy with subcortical infarcts and leukoencephalopathy: file needs to be made
[1] "Orphanet_136"
nt-probnp measurement: file needs to be made
[1] "EFO_0004745"
TP segment duration: file needs to be made
ST segment duration: file needs to be made
T wave duration: file needs to be made
carotid-femoral pulse wave velocity: file needs to be made
[1] "EFO_0004724"
congestive heart failure: file needs to be made
[1] "EFO_0000373"
atrial natriuretic factor measurement: file needs to be made
[1] "EFO_0004789"
cardiotoxicity: file needs to be made
[1] "EFO_1001482"
non-obstructive coronary artery disease: file needs to be made
[1] "EFO_1001483"
tissue plasminogen activator measurement: file needs to be made
[1] "EFO_0004791"
vascular endothelial growth factor measurement: file needs to be made
[1]