# Download QC ERP009703 pipeline v2

List all runs
https://www.ebi.ac.uk/metagenomics/api/v0.2/pipelines/2.0/analysis?experiment_type=metagenomic&study_accession=ERP009703

In [1]:
import collections
try:
    from urllib import urlencode
except ImportError:
    from urllib.parse import urlencode
from pandas import DataFrame
import matplotlib.pyplot as plt
import numpy as np

In [6]:
from jsonapi_client import Session, Filter

API_BASE = 'https://wwwdev.ebi.ac.uk/metagenomics/api/v0.2/'

In [7]:
def find_metadata(metadata, key):
    """
    Extract metadata value for given key
    """
    for m in metadata:
        if m.var_name.lower() == key.lower():
            return m.var_value
    return None


qc_keys = ['Predicted CDS', 'Predicted CDS with InterProScan match']
pipeline = '2.0'

# map GO terms to the temperature
result = {}
header = set()
qc_meta = dict()

with Session(API_BASE) as s:

    # list of runs missing metadata
    
    print('Loading data from API.', end='', flush=True)

    # preparing url
    params = {
        'experiment_type': 'metagenomic',
        'study_accession': 'ERP009703',
    }
    f = Filter(urlencode(params))
    # list runs
    for anls in s.iterate(('pipelines/%s/analysis' % pipeline), f):
        print('.', end='', flush=True)
        
        try:
            result[anls.accession]
        except KeyError:
            result[anls.accession] = dict()

        _qc_meta = anls.metadata
        for k in qc_keys:
            _pcds = int(find_metadata(_qc_meta, k))
            if _pcds is not None:
                try:
                    qc_meta[anls.accession]
                except KeyError:
                    qc_meta[anls.accession] = dict()
                qc_meta[anls.accession][k] = _pcds

        rt = "runs/%s/pipelines/%s/go-slim" % (anls.accession, anls.pipeline_version)
        af = Filter(urlencode({'page_size': 100}))
        for ann in s.iterate(rt, af):
            try:
                result[anls.accession][ann.accession]
            except KeyError:
                result[anls.accession][ann.accession] = int(ann.count)
                header.add(ann.accession)

    print("DONE")


Loading data from API.......................................................................................................................................................DONE


In [8]:
import csv

with open("ERP009703_v2.csv", "w") as csvfile:
    fieldnames = ['run',] + qc_keys + sorted(list(header))
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for k,v in result.items():
        row = {**qc_meta[k], **v}
        row['run'] = k
        writer.writerow(row)

In [9]:
df = DataFrame().from_csv('ERP009703_v2.csv').fillna("")
df

Unnamed: 0_level_0,Predicted CDS,Predicted CDS with InterProScan match,GO:0000015,GO:0000150,GO:0000156,GO:0000160,GO:0000166,GO:0000746,GO:0000902,GO:0000988,...,GO:0070469,GO:0071103,GO:0071554,GO:0071840,GO:0071941,GO:0071973,GO:0090484,GO:0098796,GO:1902494,GO:1990204
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERR770958,837108,372831,,,,,,,,,...,,,,,,,,,,
ERR770959,1096049,364688,,,,,,,,,...,,,,,,,,,,
ERR770960,928227,319163,48,51,4,750,32948,3,353,869,...,25,1956,21,2115,61,568,162,1043,192,127
ERR770961,3061298,615699,14,41,0,1020,17777,196,404,415,...,1,3428,2,930,0,29,65,21,22,56
ERR770962,1052073,373479,,,,,,,,,...,,,,,,,,,,
ERR770963,1137797,435899,,,,,,,,,...,,,,,,,,,,
ERR770964,979779,357742,65,141,60,2037,31789,30,197,973,...,25,1761,64,1921,52,498,257,468,260,131
ERR770965,1241956,614099,109,145,33,2213,61348,12,488,1433,...,55,3443,65,3539,123,690,468,378,242,265
ERR770966,1161835,254391,35,72,13,866,25241,16,158,619,...,23,1411,73,1333,6,143,121,813,401,94
ERR770967,1254970,521331,,,,,,,,,...,,,,,,,,,,
