# Download QC ERP009703 pipeline v4

List all runs
https://www.ebi.ac.uk/metagenomics/api/v0.2/pipelines/4.0/analysis?experiment_type=metagenomic&study_accession=ERP009703

In [1]:
import collections
try:
    from urllib import urlencode
except ImportError:
    from urllib.parse import urlencode
from pandas import DataFrame
import matplotlib.pyplot as plt
import numpy as np

In [2]:
from jsonapi_client import Session, Filter

API_BASE = 'https://www.ebi.ac.uk/metagenomics/api/v0.2/'

In [3]:
def find_metadata(metadata, key):
    """
    Extract metadata value for given key
    """
    for m in metadata:
        if m.var_name.lower() == key.lower():
            return m.var_value
    return None


qc_keys = ['Predicted CDS', 'Predicted CDS with InterProScan match']
pipeline = '4.0'

# map GO terms to the temperature
result = {}
header = set()
qc_meta = dict()

with Session(API_BASE) as s:

    # list of runs missing metadata
    
    print('Loading data from API.', end='', flush=True)

    # preparing url
    params = {
        'experiment_type': 'metagenomic',
        'study_accession': 'ERP009703',
    }
    f = Filter(urlencode(params))
    # list runs
    for anls in s.iterate(('pipelines/%s/analysis' % pipeline), f):
        print('.', end='', flush=True)
        
        try:
            result[anls.accession]
        except KeyError:
            result[anls.accession] = dict()

        _qc_meta = anls.metadata
        for k in qc_keys:
            _pcds = int(find_metadata(_qc_meta, k))
            if _pcds is not None:
                try:
                    qc_meta[anls.accession]
                except KeyError:
                    qc_meta[anls.accession] = dict()
                qc_meta[anls.accession][k] = _pcds

        rt = "runs/%s/pipelines/%s/go-slim" % (anls.accession, anls.pipeline_version)
        af = Filter(urlencode({'page_size': 100}))
        for ann in s.iterate(rt, af):
            try:
                result[anls.accession][ann.accession]
            except KeyError:
                result[anls.accession][ann.accession] = int(ann.count)
                header.add(ann.accession)

    print("DONE")


Loading data from API.......................................................................................................................................................DONE


In [4]:
import csv

with open("ERP009703_v4.csv", "w") as csvfile:
    fieldnames = ['run',] + qc_keys + sorted(list(header))
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for k,v in result.items():
        row = {**qc_meta[k], **v}
        row['run'] = k
        writer.writerow(row)

In [5]:
df = DataFrame().from_csv('ERP009703_v4.csv').fillna("")
df

Unnamed: 0_level_0,Predicted CDS,Predicted CDS with InterProScan match,GO:0000015,GO:0000150,GO:0000156,GO:0000160,GO:0000166,GO:0000746,GO:0000902,GO:0000988,...,GO:0070469,GO:0071103,GO:0071554,GO:0071840,GO:0071941,GO:0071973,GO:0090484,GO:0098796,GO:1902494,GO:1990204
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERR770958,839184,379455,90,92,3,1138,36741,3,335,1109,...,46,2322,41,2553,57,897,229,563,165,219
ERR770959,1099540,371158,63,49,10,805,35284,6,367,913,...,18,2225,26,2352,67,497,173,984,287,160
ERR770960,931464,323610,,,,,,,,,...,,,,,,,,,,
ERR770961,3062065,656310,,,,,,,,,...,,,,,,,,,,
ERR770962,1055429,383647,72,76,18,1086,34964,1,284,1112,...,24,2084,42,2149,57,451,179,1770,265,220
ERR770963,1140541,446260,66,112,24,1531,39281,9,309,1144,...,24,2306,102,2452,61,812,291,367,196,145
ERR770964,984026,369767,,,,,,,,,...,,,,,,,,,,
ERR770965,1245976,628142,,,,,,,,,...,,,,,,,,,,
ERR770966,1164557,267358,,,,,,,,,...,,,,,,,,,,
ERR770967,1257641,537945,93,210,96,2361,46891,36,379,1738,...,33,2827,93,2957,60,898,241,191,186,200
