# Test 1.

The following example shows how to access data from the GWAS Catalog through the REST API.

## Exercise 1

**Get a list of associations for the following variants:**

* rs142968358
* rs62402518
* rs12199222
* rs7329174

In [None]:
import requests
import pandas as pd

# API Address:
apiUrl = 'https://www.ebi.ac.uk/gwas/rest/api'

# List of variants:
variants = ['rs142968358', 'rs62402518', 'rs12199222', 'rs7329174', 'rs9879858765']

# Accessing data for a single variant:
variant = 'rs7329174'
requestUrl = '%s/singleNucleotidePolymorphisms/%s/associations?projection=associationBySnp' %(apiUrl, variant)
response = requests.get(requestUrl, headers={ "Content-Type" : "application/json"})

# The returned response is a "response" object, from which we have to extract and parse the information:
decoded = response.json()

# The returned information is parsed as a python dictionary. Take a look at the values:
decoded

## For a single variant get traits and p-values

In [None]:
for association in decoded['_embedded']['associations']:
    trait = ",".join([trait['trait'] for trait in association['efoTraits']])
    pvalue = association['pvalue']
    
    print("Trait: %s, p-value: %s" %(trait, pvalue))


## Generate table for all variants containing traits and p-values:

In [None]:

extractedData = []
for variant in variants:

    # Accessing data for a single variant:
    requestUrl = '%s/singleNucleotidePolymorphisms/%s/associations?projection=associationBySnp' %(apiUrl, variant)
    response = requests.get(requestUrl, headers={ "Content-Type" : "application/json"})
    
    # Testing if rsID exists:
    if not response.ok:
        print("[Warning] %s is not in the GWAS Catalog!!" % variant)
        continue
    
    # Test if the returned data looks good:
    try:
        decoded = response.json()
    except:
        print("[Warning] Failed to encode data for %s" % variant)
        continue
    
    for association in decoded['_embedded']['associations']:
        trait = ",".join([trait['trait'] for trait in association['efoTraits']])
        pvalue = association['pvalue']
        
        extractedData.append({'variant' : variant,
                              'trait' : trait,
                              'pvalue' : pvalue
                             })

table = pd.DataFrame.from_dict(extractedData)
table

## Extend the previous table with pubmed ID and study accession

In [None]:
def getStudy(studyLink):
    # Accessing data for a single study:
    response = requests.get(studyLink, headers={ "Content-Type" : "application/json"})
    decoded = response.json()
    
    accessionID = decoded['accessionId']
    pubmedId = decoded['publicationInfo']['pubmedId']
    
    return((accessionID, pubmedId))

In [None]:
extractedData = []
for variant in variants:

    # Accessing data for a single variant:
    requestUrl = '%s/singleNucleotidePolymorphisms/%s/associations?projection=associationBySnp' %(apiUrl, variant)
    response = requests.get(requestUrl, headers={ "Content-Type" : "application/json"})
    
    # Testing if rsID exists:
    if not response.ok:
        print("[Warning] %s is not in the GWAS Catalog!!" % variant)
        continue
    
    # Test if the returned data looks good:
    try:
        decoded = response.json()
    except:
        print("[Warning] Failed to encode data for %s" % variant)
        continue
    
    for association in decoded['_embedded']['associations']:
        # extract study data:
        (accessionID, pubmedId) = getStudy(association['_links']['study']['href'])
        
        # 
        trait = ",".join([trait['trait'] for trait in association['efoTraits']])
        pvalue = association['pvalue']
        
        extractedData.append({'variant' : variant,
                              'trait' : trait,
                              'pvalue' : pvalue,
                              'accessionID' : accessionID,
                              'pubmedID' : pubmedId
                             })
        
table = pd.DataFrame.from_dict(extractedData)
# table.to_excel('workshop.xlsx')
print(table)
        