In [1]:
from pip._internal import main as pip

try:
    import pprint
except ImportError:
    pip(['install', 'pprint'])
    import pprint

try:
    import requests
except ImportError:
    pip(['install', 'requests'])
    import requests

try:
    import pandas as pd
except ImportError:
    pip(['install', 'pandas'])
    import pandas as pd

try:
    import simplejson as json
except ImportError:
    pip(['install', 'simplejson'])
    import simplejson as json

import re
    
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 99999)
pd.set_option('display.max_columns', 99999)

## Select EdelweissData server and authenticate

In [2]:
try:
    from edelweiss_data import API, QueryExpression as Q
except ImportError:
    from edelweiss_data import API, QueryExpression as Q

edelweiss_api_url = 'https://api.develop.edelweiss.douglasconnect.com'
api = API(edelweiss_api_url)
api.authenticate()

## List metadata of all ToxCast sets on the server

In [3]:
pattern = "^EPA-ToxCast"
#pattern = ".*"
columns = [
#    ("Endpoint", "$.assay.component.endpoint"),
    ("Endpoint name", "$.assay.component.endpoint.assay_component_endpoint_name.value"),
    ("Biological target", "$.assay.component.endpoint.target.biological_process_target.value"),
    ("Entrez gene ID for the molecular target", "$.assay.component.endpoint.target.intended.intended_target_gene.intended_target_entrez_gene_id.value"),
    ("Symbol", "$.assay.component.endpoint.target.intended.intended_target_gene.intended_target_official_symbol.value"),
    ("Gene name", "$.assay.component.endpoint.target.intended.intended_target_gene.intended_target_gene_name.value")]
condition = Q.search_anywhere("EPA-ToxCast")
ToxCast = api.get_published_datasets(limit=200, columns=columns, condition=condition)
ToxCast

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset,Endpoint name,Biological target,Entrez gene ID for the molecular target,Symbol,Gene name
id,version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
7ab126dd-3a66-4cec-938e-9121d1dd270a,1,<PublishedDataset '7ab126dd-3a66-4cec-938e-9121d1dd270a':1 - EPA-ToxCastV3.1-ATG_PPARa_TRANS_dn summary data>,"""ATG_PPARa_TRANS_dn""","""regulation of transcription factor activity""",5465,"""PPARA""","""peroxisome proliferator-activated receptor alpha"""
093c8220-a5cd-4fe0-8793-4ed032f09420,1,<PublishedDataset '093c8220-a5cd-4fe0-8793-4ed032f09420':1 - EPA-ToxCastV3.1-ATG_PPARa_TRANS_up summary data>,"""ATG_PPARa_TRANS_up""","""regulation of transcription factor activity""",5465,"""PPARA""","""peroxisome proliferator-activated receptor alpha"""
35341552-11f2-4db7-b76d-e6cf990bdc1e,1,<PublishedDataset '35341552-11f2-4db7-b76d-e6cf990bdc1e':1 - EPA-ToxCastV3.1-NVS_NR_hPPARa summary data>,"""NVS_NR_hPPARa""","""receptor binding""",5465,"""PPARA""","""peroxisome proliferator-activated receptor alpha"""


In [4]:
data = ToxCast.iloc[0]['dataset'].get_data()
data

Unnamed: 0,DTXSID,DTXCID,Substance name,Substance type,Substance note,Quality control level,SMILES,CAS,InChI key,InChI,Hit call,Winning model,Quality check,Cut-off,Scaling,Upper limit,AC50 gain,Hill coefficient gain,AC50 loss,Hill coefficient loss,RMSE,IC10,IC25,IC50,bmad,resp_max,resp_min,max_mean,max_mean_conc,max_med,max_med_conc,logc_max,logc_min,cnst,hill,hcov,gnls,gcov,cnst_er,cnst_aic,cnst_rmse,cnst_prob,hill_tp,hill_tp_sd,hill_ga,hill_ga_sd,hill_gw,hill_gw_sd,hill_er,hill_er_sd,hill_aic,hill_rmse,hill_prob,gnls_tp,gnls_tp_sd,gnls_ga,gnls_ga_sd,gnls_gw,gnls_gw_sd,gnls_la,gnls_la_sd,gnls_lw,gnls_lw_sd,gnls_er,gnls_er_sd,gnls_aic,gnls_rmse,gnls_prob,nconc,npts,nrep,nmed_gtbl,fitc,actp,modl_prob,modl_acc,modl_acb,modl_ac10
1,,,4-Hydroxynonenal,,,,,,,,inactive,cnst,,1.16987,-0.648683,,,,,,0.720678,,,,0.233974,0.737162,-2.087119,0.178781,1.845098,0.203273,1.30103,1.845098,-0.09691,1,1.0,1.0,1.0,1.0,-0.648683,33.018185,0.720678,0.945281,3.808369e-11,1.7e-05,0.64601,204120.548299,1.393007,237030.856883,-0.648683,0.239563,39.018185,0.720678,0.04706277,0.8670794,,0.378933,,7.999959,,0.804851,,17.988567,,-0.666698,,42.650135,0.716089,0.007656,5,15,3,0,7,0.054719,0.945281,,,
2,DTXSID6057908,,MED_ChemMix_7EnvC,,,,-,NOCAS_57908,-,-,inactive,cnst,,1.16987,-1.051085,,,,,,0.397289,,,,0.233974,0.146758,-0.674022,0.146758,1.30103,0.146758,0.845098,2.30103,-0.09691,1,,,,,-1.051085,8.663187,0.397289,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,6,6,1,0,4,0.0,1.0,,,
3,,,MEDWater004_1,,,,,,,,inactive,cnst,,1.16987,-1.552958,,,,,,0.237146,,,,0.233974,0.412231,-0.026663,0.412231,-1.39794,0.412231,-1.39794,1.0,-1.39794,1,,,,,-1.552958,2.539439,0.237146,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,6,6,1,0,4,0.0,1.0,,,
4,DTXSID1021455,DTXCID401455,FD&C Yellow 5,Single Compound,,database.QcLevel@2ca3970a,[Na+].[Na+].[Na+].[O-]C(=O)C1=C(\N=N\C2=CC=C(C=C2)S([O-])(=O)=O)C(=O)N(N1)C1=CC=C(C=C1)S([O-])(=O)=O,1934-21-0,ZLWLTDZLUVBSRJ-GLCFPVLVSA-K,"InChI=1S/C16H12N4O9S2.3Na/c21-15-13(18-17-9-1-5-11(6-2-9)30(24,25)26)14(16(22)23)19-20(15)10-3-7-12(8-4-10)31(27,28)29;;;/h1-8,19H,(H,22,23)(H,24,25,26)(H,27,28,29);;;/q;3*+1/p-3/b18-17+;;;",inactive,cnst,,1.16987,-1.657969,,,,,,0.249088,,,,0.233974,0.439829,-0.485247,0.439829,2.30103,0.439829,1.778151,2.30103,-1.045757,1,,,,,-1.657969,2.398382,0.249088,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
5,DTXSID6020692,DTXCID00692,Methenamine,Single Compound,,database.QcLevel@2ca3970a,C1N2CN3CN1CN(C2)C3,100-97-0,VKYKSIONXSXAKP-UHFFFAOYSA-N,InChI=1S/C6H12N4/c1-7-2-9-4-8(1)5-10(3-7)6-9/h1-6H2,inactive,cnst,,1.16987,-1.539122,,,,,,0.236986,,,,0.233974,0.341149,-0.399632,0.341149,1.30103,0.341149,-1.045757,2.30103,-1.045757,1,,,,,-1.539122,2.821729,0.236986,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
6,DTXSID5020154,DTXCID30154,Clorophene,Single Compound,,database.QcLevel@2ca39709,OC1=C(CC2=CC=CC=C2)C=C(Cl)C=C1,120-32-1,NCKMMSIFQUPKCK-UHFFFAOYSA-N,"InChI=1S/C13H11ClO/c14-12-6-7-13(15)11(9-12)8-10-4-2-1-3-5-10/h1-7,9,15H,8H2",inactive,cnst,,1.16987,-1.262019,,,,,,0.300197,,,,0.233974,0.279603,-0.429911,0.279603,-1.522879,0.279603,-1.045757,1.30103,-2.0,1,,,,,-1.262019,6.915164,0.300197,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
7,DTXSID0020654,DTXCID80654,Geranyl acetate,Single Compound,,database.QcLevel@2ca39709,CC(C)=CCC\C(C)=C\COC(C)=O,105-87-3,HIGQPQRQIQDZMP-DHZHZOJOSA-N,"InChI=1S/C12H20O2/c1-10(2)6-5-7-11(3)8-9-14-12(4)13/h6,8H,5,7,9H2,1-4H3/b11-8+",inactive,cnst,,1.16987,-1.75021,,,,,,0.218674,,,,0.233974,0.20258,-0.48368,0.20258,-0.09691,0.20258,1.30103,2.30103,-1.045757,1,,,,,-1.75021,0.438739,0.218674,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
8,DTXSID4021137,DTXCID001137,"1,3-Benzenediamine",Single Compound,,database.QcLevel@2ca3970a,NC1=CC(N)=CC=C1,108-45-2,WZCQRUWWHSTZEM-UHFFFAOYSA-N,"InChI=1S/C6H8N2/c7-5-2-1-3-6(8)4-5/h1-4H,7-8H2",inactive,cnst,,1.16987,-1.347166,,,,,,0.316379,,,,0.233974,0.650373,-0.426724,0.650373,1.845098,0.650373,1.845098,2.30103,-1.045757,1,,,,,-1.347166,6.673677,0.316379,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,8,8,1,0,4,0.0,1.0,,,
9,DTXSID0042400,DTXCID8022400,Sodium hexyldecyl sulfate,Single Compound,,database.QcLevel@2ca3970a,[Na+].CCCCCCCCCCCCCCCCOS([O-])(=O)=O,1120-01-0,GGHPAKFFUZUEKL-UHFFFAOYSA-M,"InChI=1S/C16H34O4S.Na/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-20-21(17,18)19;/h2-16H2,1H3,(H,17,18,19);/q;+1/p-1",inactive,cnst,Noisy data,1.16987,0.073123,,,,,,1.426842,,,,0.233974,0.014252,-2.871778,0.014252,-0.39794,0.014252,2.0,2.0,-1.69897,1,1.0,1.0,1.0,1.0,0.073123,33.818748,1.426842,0.946499,1.778919e-10,,1.137718,,0.960422,,0.073129,,39.818748,1.426842,0.04712342,7.566622e-11,,0.964129,,1.189234,,2.111657,,5.0099,,0.073124,,43.818748,1.426842,0.006377,9,9,1,0,7,0.053501,0.946499,,,
10,DTXSID9026926,DTXCID406926,1-Tetradecanol,Single Compound,,database.QcLevel@2ca39709,CCCCCCCCCCCCCCO,112-72-1,HLZKNKRTKFSKGZ-UHFFFAOYSA-N,"InChI=1S/C14H30O/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15/h15H,2-14H2,1H3",inactive,cnst,,1.16987,-1.137317,,,,,,0.465437,,,,0.233974,7.3e-05,-1.066868,7.3e-05,0.845098,7.3e-05,2.30103,2.30103,-1.045757,1,1.0,1.0,1.0,1.0,-1.137317,11.582643,0.465437,0.946499,2.128766e-11,2e-06,2.007562,29591.085829,0.766225,104656.253613,-1.137324,0.362116,17.582643,0.465437,0.04712342,6.209278e-11,,1.739068,,1.017753,,2.054142,,4.999236,,-1.137323,,21.582643,0.465437,0.006377,8,8,1,0,7,0.053501,0.946499,,,
