In [None]:
import csv
try:
    from urllib import urlencode
except ImportError:
    from urllib.parse import urlencode
from pandas import DataFrame
import matplotlib.pyplot as plt
import numpy as np

In [None]:
from jsonapi_client import Session, Filter

API_BASE = 'https://wwwdev.ebi.ac.uk/metagenomics/api/v0.2/'

List all runs
https://wwwdev.ebi.ac.uk/metagenomics/api/v0.2/annotations/go-terms/GO:0015979/analysis?experiment_type=metagenomic&pipeline_version=2&metadata_key=temperature

In [None]:
def find_metadata(metadata):
    """
    Extract metadata value for given key
    """
    t = None
    d = None
    for m in metadata:
        if 'temperature' in m.var_name.lower():
            t = m.var_value
        if 'depth' in m.var_name.lower():
            d = m.var_value
        if t is not None and d is not None:
            return (t, d)
    return (t, d)

def find_norm(metadata):
    """
    Extract metadata value for given key
    """
    for m in metadata:
        if 'Predicted CDS with InterProScan match'.lower() == m.var_name.lower():
            return m.var_value
    return None



normilize_key = 'Predicted CDS with InterProScan match'

# map GO terms to the temperature
result = {}

with open("GO_0015979.csv", "w") as csvfile:
    fieldnames = [
        'run', 'pipeline', 'sample', 'sample_name', 'study', 'biome',
        'longitude', 'latitude', 'temperature', 'depth', 'go', 'count', 'pcds'
    ]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    with Session(API_BASE) as s:

        # temporary dict to store accession and metadata
        metadata_map = {}
        # list of runs missing metadata
        missing_meta = list()

        print('Loading data from API.', end='', flush=True)

        # preparing url
        params = {
            'experiment_type': 'metagenomic',
            'pipeline_version': 2,
            'metadata_key': 'temperature',
            'page_size': 100
        }
        f = Filter(urlencode(params))
        # list runs
        for anls in s.iterate('annotations/go-terms/GO:0015979/analysis', f):
            print('.', end='', flush=True)
            # find temperature for each run
            _t, _d = find_metadata(anls.sample.metadata)
            _pcds = int(find_norm(anls.metadata))

            # list a summary of GO terms derived from InterPro matches
            rt = "runs/%s/pipelines/%s/go-slim" % (anls.accession, anls.pipeline_version)
            af = Filter(urlencode({'page_size': 100}))
            for ann in s.iterate(rt, af):
                if ann.accession == 'GO:0015979':
                    sample = anls.sample
                    biome = sample.biome
                    row = {
                        'run': anls.accession,
                        'pipeline': anls.pipeline_version,
                        'sample': sample.accession,
                        'sample_name': sample.sample_name,
                        'study': anls.study_accession,
                        'biome': biome.lineage,
                        'longitude': sample.longitude,
                        'latitude': sample.latitude,
                        'temperature': _t,
                        'depth': _d,
                        'go': ann.accession,
                        'count': ann.count,
                        'pcds': _pcds,
                    }
                    writer.writerow(row)
                    continue


        print("DONE")
        print("Missing: ", missing_meta)