In [1]:
from pip._internal import main as pip

try:
    import pprint
except ImportError:
    pip(['install', 'pprint'])
    import pprint

try:
    import requests
except ImportError:
    pip(['install', 'requests'])
    import requests

try:
    import pandas as pd
except ImportError:
    pip(['install', 'pandas'])
    import pandas as pd

try:
    import simplejson as json
except ImportError:
    pip(['install', 'simplejson'])
    import simplejson as json

import re
    
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 99999)
pd.set_option('display.max_columns', 99999)

## Select EdelweissData server and authenticate

In [2]:
try:
    from edelweiss_data import API, QueryExpression as Q
except ImportError:
    pip(['install', 'edelweiss_data'])
    from edelweiss_data import API, QueryExpression as Q

edelweiss_api_url = 'https://api.staging.kit.cloud.douglasconnect.com'
api = API(edelweiss_api_url)
api.authenticate()

## List metadata of all ToxCast sets on the server

In [3]:
columns = [
#    ("Endpoint", "$.assay.component.endpoint"),
    ("Endpoint name", "$.assay.component.endpoint.assay_component_endpoint_name.value"),
    ("Biological target", "$.assay.component.endpoint.target.biological_process_target.value"),
    ("Entrez gene ID for the molecular target", "$.assay.component.endpoint.target.intended.intended_target_gene.intended_target_entrez_gene_id.value"),
    ("Symbol", "$.assay.component.endpoint.target.intended.intended_target_gene.intended_target_official_symbol.value"),
    ("Gene name", "$.assay.component.endpoint.target.intended.intended_target_gene.intended_target_gene_name.value"),
]
condition = Q.search_anywhere("EPA-ToxCastV3.1") & Q.search_anywhere("summary")
ToxCast = api.get_published_datasets(limit=20, columns=columns, condition=condition)
ToxCast

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset,Endpoint name,Biological target,Entrez gene ID for the molecular target,Symbol,Gene name
id,version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
61dcaffb-bb5b-4dca-8a4f-bd0bbe892546,1,<PublishedDataset '61dcaffb-bb5b-4dca-8a4f-bd0bbe892546':1 - EPA-ToxCastV3.1-ATG_DR5_CIS_up_summary_tcpl>,ATG_DR5_CIS_up,regulation of transcription factor activity,5914.0,RARA,"retinoic acid receptor, alpha"
a819148f-0b33-4915-ac24-971212bbb28f,1,<PublishedDataset 'a819148f-0b33-4915-ac24-971212bbb28f':1 - EPA-ToxCastV3.1-BSK_SAg_CD69_up_summary_tcpl>,BSK_SAg_CD69_up,regulation of gene expression,969.0,CD69,CD69 molecule
8598fd10-169c-48cd-ae21-670128a5e1a7,1,<PublishedDataset '8598fd10-169c-48cd-ae21-670128a5e1a7':1 - EPA-ToxCastV3.1-ATG_MR_TRANS2_dn_summary_tcpl>,ATG_MR_TRANS2_dn,regulation of transcription factor activity,4306.0,NR3C2,"nuclear receptor subfamily 3, group C, member 2"
7389b40f-30aa-4c4c-9544-76b4437d30d0,1,<PublishedDataset '7389b40f-30aa-4c4c-9544-76b4437d30d0':1 - EPA-ToxCastV3.1-BSK_LPS_MCP1_down_summary_tcpl>,BSK_LPS_MCP1_down,regulation of gene expression,6347.0,CCL2,chemokine (C-C motif) ligand 2
e0606378-15b5-4a55-8c24-7e817ba234ab,1,<PublishedDataset 'e0606378-15b5-4a55-8c24-7e817ba234ab':1 - EPA-ToxCastV3.1-BSK_hDFCGF_EGFR_up_summary_tcpl>,BSK_hDFCGF_EGFR_up,regulation of gene expression,1956.0,EGFR,epidermal growth factor receptor
a8ba9344-dccb-4154-9518-30e16737147e,1,<PublishedDataset 'a8ba9344-dccb-4154-9518-30e16737147e':1 - EPA-ToxCastV3.1-TOX21_CAR_Agonist_viabillity_summary_tcpl>,TOX21_CAR_Agonist_viabillity,cell proliferation,,,
89d03ff3-4d93-4f17-9e9e-3a769f014e23,1,<PublishedDataset '89d03ff3-4d93-4f17-9e9e-3a769f014e23':1 - EPA-ToxCastV3.1-ATG_M_19_TRANS_up_summary_tcpl>,ATG_M_19_TRANS_up,regulation of transcription factor activity,,,
9202d9ed-44da-4e6d-a985-ffa32ff1f2b3,1,<PublishedDataset '9202d9ed-44da-4e6d-a985-ffa32ff1f2b3':1 - EPA-ToxCastV3.1-NVS_ENZ_hLck_Activator_summary_tcpl>,NVS_ENZ_hLck_Activator,regulation of catalytic activity,3932.0,LCK,"LCK proto-oncogene, Src family tyrosine kinase"
07a9795a-f1b4-41dd-91c3-cee2e4da00b7,1,<PublishedDataset '07a9795a-f1b4-41dd-91c3-cee2e4da00b7':1 - EPA-ToxCastV3.1-NVS_NR_hPXR_summary_tcpl>,NVS_NR_hPXR,receptor binding,8856.0,NR1I2,"nuclear receptor subfamily 1, group I, member 2"
4b569cbe-4f7f-4420-9445-ae196657b3ac,1,<PublishedDataset '4b569cbe-4f7f-4420-9445-ae196657b3ac':1 - EPA-ToxCastV3.1-TOX21_Aromatase_Inhibition_viability_summary_tcpl>,TOX21_Aromatase_Inhibition_viability,cell proliferation,,,


# Access specific dataset
(first 100 rows, which is standard in EdelweissData, more can be accessed by limit=XXX)

In [4]:
data = ToxCast.iloc[0]['dataset'].get_data()
data

Unnamed: 0,DTXSID,DTXCID,Substance name,Substance type,Substance note,Quality control level,SMILES,CAS,InChI key,InChI,Hit call,Winning model,Quality check,Cut-off,Scaling,Upper limit,AC50 gain,Hill coefficient gain,AC50 loss,Hill coefficient loss,RMSE,IC10,IC25,IC50,bmad,resp_max,resp_min,max_mean,max_mean_conc,max_med,max_med_conc,logc_max,logc_min,cnst,hill,hcov,gnls,gcov,cnst_er,cnst_aic,cnst_rmse,cnst_prob,hill_tp,hill_tp_sd,hill_ga,hill_ga_sd,hill_gw,hill_gw_sd,hill_er,hill_er_sd,hill_aic,hill_rmse,hill_prob,gnls_tp,gnls_tp_sd,gnls_ga,gnls_ga_sd,gnls_gw,gnls_gw_sd,gnls_la,gnls_la_sd,gnls_lw,gnls_lw_sd,gnls_er,gnls_er_sd,gnls_aic,gnls_rmse,gnls_prob,nconc,npts,nrep,nmed_gtbl,fitc,actp,modl_prob,modl_acc,modl_acb,modl_ac10
1,,,4-Hydroxynonenal,,,,,,,,inactive,cnst,,0.592285,-3.95719,,,,,,0.023692,,,,0.118457,0.058297,-0.031944,0.026192,1.845098,0.037968,1.845098,1.845098,-0.09691,1,,,,,-3.95719,-67.434744,0.023692,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,5,15,3,0,4,0.0,1.0,,,
2,DTXSID2032390,DTXCID0012390,Fenarimol,Single Compound,,database.QcLevel@2ca3970a,OC(C1=CC=C(Cl)C=C1)(C1=CN=CN=C1)C1=CC=CC=C1Cl,60168-88-9,NHOWDZOIZKMVAI-UHFFFAOYSA-N,"InChI=1/C17H12Cl2N2O/c18-14-7-5-12(6-8-14)17(22,13-9-20-11-21-10-13)15-3-1-2-4-16(15)19/h1-11,22H",inactive,cnst,,0.592285,-2.571139,,,,,,0.091574,,,,0.118457,0.10944,-0.173539,0.10944,0.30103,0.10944,-0.09691,2.30103,-0.09691,1,,,,,-2.571139,-9.260122,0.091574,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,6,6,1,0,4,0.0,1.0,,,
3,,,MEDWater003_9,,,,,,,,inactive,hill,,0.592285,-3.800753,0.386981,0.307043,2.161546,,,0.033176,,,,0.118457,0.373991,-0.071087,0.373991,1.0,0.373991,1.0,1.0,-1.39794,1,1.0,1.0,1.0,1.0,-1.913121,-0.680165,0.193965,0.0003121952,0.3869812,0.02703262,0.307043,0.055777,2.161546,0.510227,-3.800753,0.439969,-16.54302,0.033176,0.868963,0.4487889,,0.381423,,1.934698,,1.174957,,5.099,,-3.826411,,-12.754602,0.032915,0.130725,6,6,1,1,13,0.999688,0.868963,,0.793204,-0.13442
4,DTXSID1020699,DTXCID10699,4-Hexylresorcinol,Single Compound,,database.QcLevel@2ca3970a,CCCCCCC1=C(O)C=C(O)C=C1,136-77-6,WFJIVOKAWHGMBH-UHFFFAOYSA-N,"InChI=1S/C12H18O2/c1-2-3-4-5-6-10-7-8-11(13)9-12(10)14/h7-9,13-14H,2-6H2,1H3",inactive,cnst,,0.592285,-2.947118,,,,,,0.055983,,,,0.118457,0.075386,-0.087032,0.075386,-1.522879,0.075386,1.30103,1.845098,-1.522879,1,,,,,-2.947118,-20.002778,0.055983,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
5,DTXSID7041623,DTXCID5021623,Benodanil,Single Compound,,database.QcLevel@2ca3970a,IC1=C(C=CC=C1)C(=O)NC1=CC=CC=C1,15310-01-7,LJOZMWRYMKECFF-UHFFFAOYSA-N,"InChI=1S/C13H10INO/c14-12-9-5-4-8-11(12)13(16)15-10-6-2-1-3-7-10/h1-9H,(H,15,16)",inactive,cnst,,0.592285,-1.823092,,,,,,0.175241,,,,0.118457,0.024417,-0.284756,0.024417,-0.522879,0.024417,2.30103,2.30103,-1.045757,1,,,,,-1.823092,-1.866042,0.175241,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
6,DTXSID1021455,DTXCID401455,FD&C Yellow 5,Single Compound,,database.QcLevel@2ca3970a,[Na+].[Na+].[Na+].[O-]C(=O)C1=C(\N=N\C2=CC=C(C=C2)S([O-])(=O)=O)C(=O)N(N1)C1=CC=C(C=C1)S([O-])(=O)=O,1934-21-0,ZLWLTDZLUVBSRJ-GLCFPVLVSA-K,"InChI=1S/C16H12N4O9S2.3Na/c21-15-13(18-17-9-1-5-11(6-2-9)30(24,25)26)14(16(22)23)19-20(15)10-3-7-12(8-4-10)31(27,28)29;;;/h1-8,19H,(H,22,23)(H,24,25,26)(H,27,28,29);;;/q;3*+1/p-3/b18-17+;;;",inactive,cnst,,0.592285,-2.600587,,,,,,0.114931,,,,0.118457,0.037714,-0.281163,0.037714,-1.045757,0.037714,2.30103,2.30103,-1.045757,1,,,,,-2.600587,-11.390602,0.114931,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
7,DTXSID6020692,DTXCID00692,Methenamine,Single Compound,,database.QcLevel@2ca3970a,C1N2CN3CN1CN(C2)C3,100-97-0,VKYKSIONXSXAKP-UHFFFAOYSA-N,InChI=1S/C6H12N4/c1-7-2-9-4-8(1)5-10(3-7)6-9/h1-6H2,inactive,cnst,,0.592285,-2.233116,,,,,,0.135647,,,,0.118457,0.309444,-0.114039,0.309444,0.477121,0.309444,0.477121,2.30103,-1.045757,1,,,,,-2.233116,-7.330246,0.135647,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
8,DTXSID5020154,DTXCID30154,Clorophene,Single Compound,,database.QcLevel@2ca39709,OC1=C(CC2=CC=CC=C2)C=C(Cl)C=C1,120-32-1,NCKMMSIFQUPKCK-UHFFFAOYSA-N,"InChI=1S/C13H11ClO/c14-12-6-7-13(15)11(9-12)8-10-4-2-1-3-5-10/h1-7,9,15H,8H2",inactive,cnst,,0.592285,-2.197889,,,,,,0.121534,,,,0.118457,0.192645,-0.155749,0.192645,1.30103,0.192645,1.30103,1.30103,-2.0,1,,,,,-2.197889,-7.776697,0.121534,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
9,DTXSID0020654,DTXCID80654,Geranyl acetate,Single Compound,,database.QcLevel@2ca39709,CC(C)=CCC\C(C)=C\COC(C)=O,105-87-3,HIGQPQRQIQDZMP-DHZHZOJOSA-N,"InChI=1S/C12H20O2/c1-10(2)6-5-7-11(3)8-9-14-12(4)13/h6,8H,5,7,9H2,1-4H3/b11-8+",inactive,cnst,,0.592285,-1.809585,,,,,,0.204966,,,,0.118457,0.061546,-0.420427,0.061546,0.845098,0.061546,2.30103,2.30103,-1.045757,1,1.0,1.0,1.0,1.0,-1.809585,-0.453239,0.204966,0.9464991,1.455332e-13,,1.757929,,1.164154,,-1.809584,,5.546761,0.204966,0.047123,2.115252e-12,,1.854206,,1.264229,,2.543745,,5.101293,,-1.809585,,9.546761,0.204966,0.006377,8,8,1,0,7,0.053501,0.946499,,,
10,DTXSID4021137,DTXCID001137,"1,3-Benzenediamine",Single Compound,,database.QcLevel@2ca3970a,NC1=CC(N)=CC=C1,108-45-2,WZCQRUWWHSTZEM-UHFFFAOYSA-N,"InChI=1S/C6H8N2/c7-5-2-1-3-6(8)4-5/h1-4H,7-8H2",inactive,cnst,,0.592285,-2.775363,,,,,,0.067564,,,,0.118457,0.086437,-0.119032,0.086437,-0.522879,0.086437,1.845098,2.30103,-1.045757,1,,,,,-2.775363,-17.148119,0.067564,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
