In [1]:
import pandas as pd 

In [2]:
def load_api_data(url):
    """
    Loading data using the API.
    """

    import urllib.request, json, ssl 

    gcontext = ssl.SSLContext()  
    with urllib.request.urlopen(url, context=gcontext) as url:
        data = url.read()
        data = pd.read_json(data)
        return(data)

def load_subject_sample_map():
    """
    Loading the subject to samples merged table.
    """
    subjects = load_api_data('https://staging.cmi-pb.org:443/db/subject')
    samples = load_api_data('https://staging.cmi-pb.org:443/db/sample')
    master = subjects.merge(samples, on='subject_id')
    return(master)

In [3]:
datasets = {}

# get meta master table
for longname in ('subject', 'specimen'): 
    url = 'https://www.cmi-pb.org/api/{}'.format(longname)
    df = load_api_data(url)
    datasets[longname] = df
    

subjects = datasets['subject']
specimen = datasets['specimen']
master_meta = subjects.merge(specimen, on='subject_id')

master_meta = master_meta[['subject_id',
                           'specimen_id',
                           'infancy_vac',
                           'biological_sex',
                           'year_of_birth',
                           'date_of_boost',
                           'actual_day_relative_to_boost',
                           'planned_day_relative_to_boost',
                           'ethnicity',
                           'race',
                           'study_name',
                           'specimen_type',
                           'visit']]
datasets['master_meta'] = master_meta

In [4]:
assays = ('cytof', 'olink', 'rnaseq')
longnames = ('live_cell_percentages', 'olink_prot_exp', 'rnaseq')

for i, longname in enumerate(longnames):
    
    
    if longname == 'olink_prot_exp':
        continue 
        
    print(longname)
    
    url = 'https://www.cmi-pb.org/api/{}'.format(longname)
    url = 'https://www.cmi-pb.org:443/api/v2/{}'.format(longname)
    df = load_api_data(url)
    
    if assays[i] == 'rnaseq':
        df.loc[:, 'ensembl_gene_id'] = df.loc[:, 'versioned_ensembl_gene_id'].str.replace('\.[0-9]+', '')
        df = df.pivot(index='specimen_id', columns='ensembl_gene_id', values='tpm')
        df = master_meta.merge(df, on='specimen_id')
        
    elif assays[i] == 'cytof':
        df = df.pivot(index='specimen_id', columns='cell_type_name', values='percent_live_cell')
        df = master_meta.merge(df, on='specimen_id')
        
    else:
        df = df.pivot(index='specimen_id', columns='uniprot_id', values='protein_expression')
        df = master_meta.merge(df, on='specimen_id')    
    
    datasets[assays[i]] = df

live_cell_percentages
rnaseq


HTTPError: HTTP Error 404: Not Found

In [None]:
for assay, df in datasets.items():
    print(assay)
    
    print(sorted(master_meta.actual_day_relative_to_boost.unique()))

In [None]:
df.planned_day_relative_to_boost.unique()

In [None]:
print(sorted(master_meta.actual_day_relative_to_boost.unique()))

In [None]:
print(sorted(master_meta.planned_day_relative_to_boost.unique()))

In [None]:
print(sorted(datasets['cytof'].actual_day_relative_to_boost.unique()))

In [None]:
test = datasets['cytof'].sort_values(['subject_id', 'actual_day_relative_to_boost'])

In [None]:
test

In [None]:
lt0 = test[test['actual_day_relative_to_boost'] < 0].subject_id

In [None]:
test[test['subject_id'].isin([lt0.iloc[0]])]