In [1]:
from pip._internal import main as pip

try:
    import pprint
except ImportError:
    pip(['install', 'pprint'])
    import pprint

try:
    import requests
except ImportError:
    pip(['install', 'requests'])
    import requests

try:
    import pandas as pd
except ImportError:
    pip(['install', 'pandas'])
    import pandas as pd

try:
    import simplejson as json
except ImportError:
    pip(['install', 'simplejson'])
    import simplejson as json

import re

pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 99999)
pd.set_option('display.max_columns', 99999)

## Select EdelweissData server and authenticate

In [2]:
try:
    from edelweiss_data import API, QueryExpression as Q
except ImportError:
    pip(['install', 'edelweiss_data'])
    from edelweiss_data import API, QueryExpression as Q

edelweiss_api_url = 'https://api.staging.kit.cloud.douglasconnect.com'
api = API(edelweiss_api_url)
api.authenticate()

## List metadata of all TG-Gates sets on the server for specific compounds

In [3]:
compounds = [
 'BJQHLKABXJIVAM-UHFFFAOYSA-N',
 'HEMJJKBWTPKOJG-UHFFFAOYSA-N',
 'XJGBDJOMWKAZJS-UHFFFAOYSA-N',
 'IIBYAHWJQTYFKB-UHFFFAOYSA-N',
 'YMTINGFKWWXKFG-UHFFFAOYSA-N',
 'SZRPDCCEHVWOJX-UHFFFAOYSA-N',
 'KPSRODZRAIWAKH-UHFFFAOYSA-N',
 'KNHUKKLJHYUCFP-UHFFFAOYSA-N'
]

In [4]:
columns = [
#    ("Metadata", "$"),
    ("Compound", "$.Compound.Name"),
    ("InChI key", "$.Compound.\"InChI Key\""),
    ("Organism", "$.Assay.Organism"),
    ("Study type", "$.Assay.\"Study type\""),
    ("Organ", "$.Assay.Organ"),
    ("Dose", "$.Assay.Exposure.Dose"),
    ("Duration", "$.Assay.Exposure.Duration")
]
condition = Q.search_anywhere("TG-GATES")
condition = condition & Q.fuzzy_search(Q.column('Organism'), 'Rat')
condition = condition & Q.fuzzy_search(Q.column('Organ'), 'Liver')
condition = condition & Q.fuzzy_search(Q.column('Study type'), 'in_vitro')
condition = condition & Q.fuzzy_search(Q.column('Dose'), 'high')

cquery = False

TGGATEs = pd.DataFrame()
for compound in compounds:
    cquery = condition & Q.fuzzy_search(Q.column('InChI key'), compound)
    TGGATEs = pd.concat([TGGATEs, api.get_published_datasets(limit=200, columns=columns, condition=cquery)])

TGGATEs

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset,Compound,InChI key,Organism,Study type,Organ,Dose,Duration
id,version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
d4242821-178f-4784-8dec-cbdbecdf013c,1,<PublishedDataset 'd4242821-178f-4784-8dec-cbdbecdf013c':1 - TG-GATES-gemfibrozil_Rat_Liver_in vitro_24_hr_high_FOLD_CHANGES>,gemfibrozil,InChIKey=HEMJJKBWTPKOJG-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,24
62b953bd-8bc1-4259-a460-6fe1bcfb3e36,1,<PublishedDataset '62b953bd-8bc1-4259-a460-6fe1bcfb3e36':1 - TG-GATES-gemfibrozil_Rat_Liver_in vitro_2_hr_high_FOLD_CHANGES>,gemfibrozil,InChIKey=HEMJJKBWTPKOJG-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,2
c809bda3-4902-4661-bca0-e814e1a0e6ef,1,<PublishedDataset 'c809bda3-4902-4661-bca0-e814e1a0e6ef':1 - TG-GATES-gemfibrozil_Rat_Liver_in vitro_8_hr_high_FOLD_CHANGES>,gemfibrozil,InChIKey=HEMJJKBWTPKOJG-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,8
0d136884-b67b-4210-b0a2-60ba72589081,1,<PublishedDataset '0d136884-b67b-4210-b0a2-60ba72589081':1 - TG-GATES-fenofibrate_Rat_Liver_in vitro_8_hr_high_FOLD_CHANGES>,fenofibrate,InChIKey=YMTINGFKWWXKFG-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,8
ef2ba55a-7a08-41f3-878f-62465cc8c33d,1,<PublishedDataset 'ef2ba55a-7a08-41f3-878f-62465cc8c33d':1 - TG-GATES-fenofibrate_Rat_Liver_in vitro_24_hr_high_FOLD_CHANGES>,fenofibrate,InChIKey=YMTINGFKWWXKFG-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,24
361abe50-9559-4033-b614-fc913a26b7ae,1,<PublishedDataset '361abe50-9559-4033-b614-fc913a26b7ae':1 - TG-GATES-fenofibrate_Rat_Liver_in vitro_2_hr_high_FOLD_CHANGES>,fenofibrate,InChIKey=YMTINGFKWWXKFG-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,2
28401831-6376-4c20-a7d4-11beb10e97ed,1,<PublishedDataset '28401831-6376-4c20-a7d4-11beb10e97ed':1 - TG-GATES-WY-14643_Rat_Liver_in vitro_2_hr_high_FOLD_CHANGES>,WY-14643,InChIKey=SZRPDCCEHVWOJX-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,2
1fa1df59-f941-464e-befd-c45f9e890592,1,<PublishedDataset '1fa1df59-f941-464e-befd-c45f9e890592':1 - TG-GATES-WY-14643_Rat_Liver_in vitro_24_hr_high_FOLD_CHANGES>,WY-14643,InChIKey=SZRPDCCEHVWOJX-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,24
7b3816f4-30ae-4aa6-8703-7b42b33c316b,1,<PublishedDataset '7b3816f4-30ae-4aa6-8703-7b42b33c316b':1 - TG-GATES-WY-14643_Rat_Liver_in vitro_8_hr_high_FOLD_CHANGES>,WY-14643,InChIKey=SZRPDCCEHVWOJX-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,8
e3df03ef-755e-4f26-ac87-036e518db7dc,1,<PublishedDataset 'e3df03ef-755e-4f26-ac87-036e518db7dc':1 - TG-GATES-clofibrate_Rat_Liver_in vitro_24_hr_high_FOLD_CHANGES>,clofibrate,InChIKey=KNHUKKLJHYUCFP-UHFFFAOYSA-N,Rat,in_vitro,Liver,high,24


## Access first dataset
(first 100 rows, which is standard in EdelweissData, more can be accessed by limit=XXX)

In [None]:
data = TGGATEs.iloc[0]['dataset'].get_data()
data

Unnamed: 0,PROBEID,SYMBOL,ENSEMBL,ENTREZID,logFC,AveExpr,t,P.Value,adj.P.Val,B
1,1394844_s_at,Cyp4a2,ENSRNOG00000030154,24306,1.328554,1.222763,40.470601,8.270322e-68,2.5719869999999996e-63,125.419909
2,1370310_at,Hmgcs2,ENSRNOG00000019120,24450,0.873744,1.593723,26.67613,8.588093999999998e-50,1.335406e-45,94.749065
3,1370397_at,Cyp4a3,ENSRNOG00000009741,298423,0.850318,0.941899,26.048491,8.233981e-49,8.535619e-45,92.914356
4,1387783_a_at,Acaa1a,ENSRNOG00000032908,24157,0.617884,1.621441,18.722486,5.8471519999999996e-36,4.546014e-32,67.709313
5,1367659_s_at,Eci1,ENSRNOG00000008843,29740,0.524094,2.132206,16.039,1.524989e-30,9.485128e-27,56.544719
6,1391483_at,Creb3l3,ENSRNOG00000032202,314638,0.457627,1.666656,14.084901,2.336732e-26,1.2111670000000001e-22,47.750298
7,1386901_at,Cd36,ENSRNOG00000040108,29184,0.446998,1.677112,13.776982,1.109376e-25,4.92864e-22,46.317038
8,1388924_at,Angptl4,ENSRNOG00000007545,362850,0.435761,1.68433,13.413998,7.045362e-25,2.738797e-21,44.612157
9,1389253_at,Vnn1,ENSRNOG00000016219,29142,0.432929,0.925512,13.342054,1.017866e-24,3.517178e-21,44.27234
10,1368283_at,Ehhadh,ENSRNOG00000001770,171142,0.426761,0.346334,13.15124,2.70715e-24,8.418965e-21,43.368099
