In [1]:
# supress warnings
options(warn=-1)

In [2]:
# get date for time stamping
today=format(Sys.Date(), "%Y-%m-%d")

In [24]:
# load packages
library('gwasrapidd')
library('tidyverse')
library('glue')                     

In [35]:
# Get all efo_ids 
ebi<-read_csv('/home/jovyan/EBI_codes.csv')
head(ebi)

[1mRows: [22m[34m45887[39m [1mColumns: [22m[34m6[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (6): Disease trait, EFO term, EFO URI, Parent term, Parent URI, EFO_ids

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


Disease trait,EFO term,EFO URI,Parent term,Parent URI,EFO_ids
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Cervical cancer,cervical cancer,http://purl.obolibrary.org/obo/MONDO_0002974,Cancer,http://www.ebi.ac.uk/efo/EFO_0000616,MONDO_0002974
Clubfoot,familial clubfoot with or without associated lower limb anomalies,http://www.orpha.net/ORDO/Orphanet_199315,NR,NR,Orphanet_199315
X-12093 levels,X-12093 measurement,http://www.ebi.ac.uk/efo/EFO_0021285,Other measurement,http://www.ebi.ac.uk/efo/EFO_0001444,EFO_0021285
Alcohol dependence or heroin dependence or methamphetamine dependence,methamphetamine dependence,http://www.ebi.ac.uk/efo/EFO_0004701,Neurological disorder,http://www.ebi.ac.uk/efo/EFO_0000618,EFO_0004701
Methamphetamine dependence,methamphetamine dependence,http://www.ebi.ac.uk/efo/EFO_0004701,Neurological disorder,http://www.ebi.ac.uk/efo/EFO_0000618,EFO_0004701
X-12063 levels,X-12063 measurement,http://www.ebi.ac.uk/efo/EFO_0021283,Other measurement,http://www.ebi.ac.uk/efo/EFO_0001444,EFO_0021283


In [45]:
# get traits for Cardiovascular Disease Traits
cv_only <- ebi %>% filter(str_detect(ebi$`Parent term`, "ardiovascular") | str_detect(ebi$`Disease trait`, "ongenital")) # get a broad list of traits
efo_ids=unique(cv_only$EFO_ids)
efo_ids<-append(efo_ids,'MONDO_0005090') # Add Schizophrenia as a negative control
efo_ids<-append(efo_ids,'MONDO_0005178') # Add Osteoarthritis as a negative control

In [48]:
# get metadata for those trait terms, the efo_ids in particular
efos <- get_traits(efo_id = efo_ids)
efos <- efos@traits # gets the (only) traits 'slot' of the output
print(dim(efos))
head(efos)

[1] 273   3


efo_id,trait,uri
<chr>,<chr>,<chr>
EFO_0004715,mri defined brain infarct,http://www.ebi.ac.uk/efo/EFO_0004715
EFO_0004718,vascular dementia,http://www.ebi.ac.uk/efo/EFO_0004718
EFO_0001666,aortic aneurysm,http://www.ebi.ac.uk/efo/EFO_0001666
Orphanet_136,cerebral autosomal dominant arteriopathy with subcortical infarcts and leukoencephalopathy,http://www.orpha.net/ORDO/Orphanet_136
EFO_0004745,nt-probnp measurement,http://www.ebi.ac.uk/efo/EFO_0004745
EFO_0011035,TP segment duration,http://www.ebi.ac.uk/efo/EFO_0011035


In [None]:
# Make a table of index SNPs for each trait

list_of_tibbles<-list()

for (i in seq_along(efos$trait)) {
    if (file.exists(glue('/nfs/team205/heart/EBI_GWAS/index_snps/{efos$efo_id[i]}_{efos$trait[i]}_index_SNPs.csv'))==FALSE) {
        print(glue('{efos$trait[i]}: file needs to be made'))
        variants_table<-get_variants(efo_id=efos$efo_id[i])@variants #gets variants for the trait
        variants_table<-variants_table[complete.cases(variants_table), ] # removes any variants where SNP coordinates are not known, since these are useless to us
        if (dim(variants_table)[1]>0) { # only keep traits for which there are SNPs
            variants_table$efo_id=efos$efo_id[i]
            variants_table$efo_term=efos$trait[i]
            n_SNPs<-dim(variants_table)[1]
            print(efos$efo_id[i])
            list_of_tibbles[[i]]<-variants_table
            write_csv(list_of_tibbles[[i]],glue('/nfs/team205/heart/EBI_GWAS/index_snps/{efos$efo_id[i]}_{efos$trait[i]}_index_SNPs.csv'))
        }
    } else {
        print(glue('{efos$trait[i]}: file already exists'))
    }
}