In [110]:
import requests
import json
import pandas as pd
import seaborn as sns
import numpy as np

In [193]:
outputs_url = "http://0.0.0.0:8000/api/v1/outputs_elastic/?substance=caffeine&pktype=clearance&final=true"
units_factor = {'ml/min/kg':60 ,'ml/min':float(60/1000), 'ml/h/kg':1,'l/h':1}
units_mapping = {'ml/min/kg':'ml/h/kg','ml/min':'ml/h', 'ml/h/kg':'ml/h/kg','l/h':'l/h'}                

In [215]:
def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

def norm_value(df_row):
    factor = units_factor[df_row.unit]
    value = None
    if df_row.value:
        value = df_row.value
    elif df_row.mean:
        value = df_row["mean"]
    elif df_row.median:
        value = df_row["median"]
    if value:
        return float(value) * factor
    

    

In [195]:
response = requests.get(outputs_url)

In [196]:
num_pages = response.json()["last_page"]

In [197]:
data = []
for page in range(2,num_pages +1):
    url = outputs_url + f"&page={page}"
    response = requests.get(url)
    data += response.json()["data"]["data"]

In [198]:
flatten_data = [flatten_json(d) for d in data]


In [199]:
df_outputs = pd.DataFrame(flatten_data)

In [224]:
df_outputs["unit"].unique()

array(['ml/min', 'ml/min/kg', 'l/h', 'ml/h/kg', 'l/h/kg',
       'ml/min/1.73m^2'], dtype=object)

In [200]:
individual_ids = set(df_outputs["individual_pk"].dropna())
individual_ids_joined = "__".join([str(int(id)) for id in individual_ids])
df_outputs["individual_pk"] = df_outputs["individual_pk"].fillna(0).astype(int)

In [201]:
individual_ids_joined


'614__615__616__617__618__619__620__621__622__623__624__625__626__627__628__629__630__631__650__651__652__653__654__655__656__657__658__659__660__661__662__663__664__665__666__667__668__669__756__757__758__759__760__761__762__763__764__765__766__767__768__769__770__771__772__773__774__775__776__777__778__779__780__781__782__783__784__785__786__787__803__804__805__806__807__808__809__810__811__812__813__818__819__820__821__822__823__824__825__826__827__828__829__830__831__832__833__834__835__836__837__838__839__840__841__842__843__844__845__846__847__848__849__891__892__893__894__895__896__897__898__899__900__901__902__903__904__905__906__907__908__909__910__911__912__913__914__915__916__917__918__919__920__921__922__923__924__925__926__927__928__929__930__931__932__933__934__935__936__937__938__939__940__941__942__943__944__945__946__947__948__949__950__951__952__953__954__955__956__957__958__959__960__961__962__963__964__965__966__967__968__969__977__978__979__980__981__982__983__984_

In [202]:
response  = requests.get(f"http://0.0.0.0:8000/api/v1/individuals_elastic/?ids={individual_ids_joined}")
num_pages = response.json()["last_page"]
indivdiuals_url  = f"http://0.0.0.0:8000/api/v1/individuals_elastic/?ids={individual_ids_joined}"
data = []
for page in range(2,num_pages +1):
    url = indivdiuals_url + f"&page={page}"
    response = requests.get(url)
    data += response.json()["data"]["data"]

In [203]:
flatten_data = [flatten_json(d) for d in data]
df_individuals = pd.DataFrame(flatten_data)

In [204]:
complete = pd.merge(df_individuals,df_outputs, left_on="pk", right_on="individual_pk")

In [205]:
list(complete.columns)

['characteristica_all_final_0_category',
 'characteristica_all_final_0_choice',
 'characteristica_all_final_0_count',
 'characteristica_all_final_0_ctype',
 'characteristica_all_final_0_cv',
 'characteristica_all_final_0_max',
 'characteristica_all_final_0_mean',
 'characteristica_all_final_0_median',
 'characteristica_all_final_0_min',
 'characteristica_all_final_0_pk',
 'characteristica_all_final_0_sd',
 'characteristica_all_final_0_se',
 'characteristica_all_final_0_unit',
 'characteristica_all_final_0_value',
 'characteristica_all_final_10_category',
 'characteristica_all_final_10_choice',
 'characteristica_all_final_10_count',
 'characteristica_all_final_10_ctype',
 'characteristica_all_final_10_cv',
 'characteristica_all_final_10_max',
 'characteristica_all_final_10_mean',
 'characteristica_all_final_10_median',
 'characteristica_all_final_10_min',
 'characteristica_all_final_10_pk',
 'characteristica_all_final_10_sd',
 'characteristica_all_final_10_se',
 'characteristica_all_fin

In [206]:
df_individuals

Unnamed: 0,characteristica_all_final_0_category,characteristica_all_final_0_choice,characteristica_all_final_0_count,characteristica_all_final_0_ctype,characteristica_all_final_0_cv,characteristica_all_final_0_max,characteristica_all_final_0_mean,characteristica_all_final_0_median,characteristica_all_final_0_min,characteristica_all_final_0_pk,...,characteristica_all_final_9_sd,characteristica_all_final_9_se,characteristica_all_final_9_unit,characteristica_all_final_9_value,group_name,group_pk,name,pk,study_name,study_pk
0,age,,18,group,,30.00,26.00,,23.00,4215,...,,,,,control,514,28,623,Abernethy1985,4029248
1,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,2,900,Joeres1988,3371873
2,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,3,901,Joeres1988,3371873
3,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,4,902,Joeres1988,3371873
4,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,5,903,Joeres1988,3371873
5,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,6,904,Joeres1988,3371873
6,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,7,905,Joeres1988,3371873
7,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,8,906,Joeres1988,3371873
8,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,9,907,Joeres1988,3371873
9,species,homo sapiens,71,group,,,,,,4872,...,,,,,healthy nonsmokers,587,10,908,Joeres1988,3371873


In [207]:
def get_bodyweight(x):
    row_categories = list(x[x == "weight"].index)
    fields = ["value","mean","median"]
    for field in fields:
        row_choices = [category[:-8]+field for category in row_categories]
        for bodyweight in list(x[row_choices]):
            try:
                if float(bodyweight) > 0:
                    return float(bodyweight)
            except:
                pass
    
    


def get_dosing(x):
    row_categories = list(x[x == "dosing"].index)
    row_choices = [category[:-8]+"value" for category in row_categories]
    
    # x[row_choices]
    return list(x[row_choices].dropna())    
    

In [216]:
df1 = pd.DataFrame()
df1["bodyweight"] = complete.apply(get_bodyweight, axis=1)
df1["norm_value"] = complete.apply(norm_value, axis=1)
df1["norm_unit"] = complete.apply(lambda x: units_mapping[x["unit"]], axis=1)
df1["study"] = complete["study_name"]


In [217]:
#df1["clearance_unit"] = complete["unit"]
#df1["clearance_value"] = complete["value"]
#df1["clearance_mean"] = complete["mean"]

In [220]:
df1[(df1["study"]=="Joeres1988") & (df1["norm_unit"]== "ml/min/kg")]
#df1[(df1["clearance_unit"] == "ml/min/kg")]

#complete.apply(get_bodyweight, axis=1)

Unnamed: 0,bodyweight,norm_value,norm_unit,study


In [222]:
df1[(df1["study"]=="Joeres1988")]["norm_unit"].unique()

array(['ml/h'], dtype=object)

In [85]:
df1[df1["bodyweight"].isnull()]

Unnamed: 0,bodyweight,study
1,,Joeres1988
2,,Joeres1988
3,,Joeres1988
4,,Joeres1988
5,,Joeres1988
6,,Joeres1988
7,,Joeres1988
8,,Joeres1988
9,,Joeres1988
10,,Joeres1988


In [151]:
complete[["value","mean","median"]][300:350]

Unnamed: 0,value,mean,median
300,120.77,,
301,100.83,,
302,180.15,,
303,130.51,,
304,180.4,,
305,280.62,,
306,221.01,,
307,90.14,,
308,150.94,,
309,99.4,,


In [227]:
complete[complete["pk_y"]== 7975]

Unnamed: 0,characteristica_all_final_0_category,characteristica_all_final_0_choice,characteristica_all_final_0_count,characteristica_all_final_0_ctype,characteristica_all_final_0_cv,characteristica_all_final_0_max,characteristica_all_final_0_mean,characteristica_all_final_0_median,characteristica_all_final_0_min,characteristica_all_final_0_pk,...,pk_y,pktype,sd,se,substance_name,time,time_unit,tissue,unit,value


In [229]:
df_outputs[df_outputs["pk"] ==7975]

Unnamed: 0,cv,final,group,group_name,group_pk,individual,individual_name,individual_pk,interventions_0_name,interventions_0_pk,...,pk,pktype,sd,se,substance_name,time,time_unit,tissue,unit,value
337,0.94,True,,cirrhosis smokers,588.0,,,0,Dcaf,443.0,...,7975,clearance,0.85,0.2,caffeine,,,plasma,ml/min/kg,


In [226]:
complete

Unnamed: 0,characteristica_all_final_0_category,characteristica_all_final_0_choice,characteristica_all_final_0_count,characteristica_all_final_0_ctype,characteristica_all_final_0_cv,characteristica_all_final_0_max,characteristica_all_final_0_mean,characteristica_all_final_0_median,characteristica_all_final_0_min,characteristica_all_final_0_pk,...,pk_y,pktype,sd,se,substance_name,time,time_unit,tissue,unit,value
0,age,,18,group,,30.00,26.00,,23.00,4215,...,5473,clearance,,,caffeine,,,plasma,ml/min/kg,0.81
1,species,homo sapiens,71,group,,,,,,4872,...,7888,clearance,,,caffeine,,,plasma,ml/min,97.14
2,species,homo sapiens,71,group,,,,,,4872,...,7889,clearance,,,caffeine,,,plasma,ml/min,91.81
3,species,homo sapiens,71,group,,,,,,4872,...,7890,clearance,,,caffeine,,,plasma,ml/min,68.19
4,species,homo sapiens,71,group,,,,,,4872,...,7891,clearance,,,caffeine,,,plasma,ml/min,67.43
5,species,homo sapiens,71,group,,,,,,4872,...,7892,clearance,,,caffeine,,,plasma,ml/min,65.52
6,species,homo sapiens,71,group,,,,,,4872,...,7893,clearance,,,caffeine,,,plasma,ml/min,65.14
7,species,homo sapiens,71,group,,,,,,4872,...,7894,clearance,,,caffeine,,,plasma,ml/min,59.43
8,species,homo sapiens,71,group,,,,,,4872,...,7895,clearance,,,caffeine,,,plasma,ml/min,57.52
9,species,homo sapiens,71,group,,,,,,4872,...,7896,clearance,,,caffeine,,,plasma,ml/min,52.57
