# EdelweissData Python client API demonstration

## Setup

In [1]:
from edelweiss_data import API, QueryExpression as Q
import pandas
import requests

In [2]:
from IPython.core.display import HTML

edelweiss_ui_url = 'https://ui.develop.edelweiss.douglasconnect.com'

def display_link_to_published_dataset(id, version):
    dataset_url = '{}/dataexplorer?dataset={}:{}'.format(edelweiss_ui_url, id, version)
    return HTML('<a href="{}">View the dataset {} in the browser</a>'.format(dataset_url, id))

#### Set up connection

In [3]:
#edelweiss_api_url = 'https://api.develop.edelweiss.douglasconnect.com'
edelweiss_api_url = 'http://localhost:8000'

In [4]:
api = API(edelweiss_api_url)

#### Authenticate yourself to edelweiss.

The default authentication uses Auth0Jwt. You will be asked to visit a url in a web browser to confirm who you are. You can skip this step if you want to view only publicly visible datasets.

You can also use development mode, which skips the proper authentication.

In [5]:
#api.authenticate()
api.authenticate(development=True)

### Get a dataframe of all published datasets

In [6]:
datasets = api.get_published_datasets()
datasets

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset
id,version,Unnamed: 2_level_1
b45dc6d8-518a-4d18-8ac2-86a5ed6d4245,1,<PublishedDataset 'b45dc6d8-518a-4d18-8ac2-86a...
51c2e954-74cf-49c1-809b-7be86c4fcbb1,1,<PublishedDataset '51c2e954-74cf-49c1-809b-7be...
ce0643c3-9a66-4de0-a4b4-0fd3c006ec27,1,<PublishedDataset 'ce0643c3-9a66-4de0-a4b4-0fd...
fc1b9d13-0526-48e1-b2bf-bfa904ee070b,1,<PublishedDataset 'fc1b9d13-0526-48e1-b2bf-bfa...
726f770e-d721-4336-9278-34ccf9fdeb01,1,<PublishedDataset '726f770e-d721-4336-9278-34c...
...,...,...
50f6021e-d794-44c1-9211-f08abab199f8,1,<PublishedDataset '50f6021e-d794-44c1-9211-f08...
7777f6f6-b192-4585-97b3-e7b4408b38aa,1,<PublishedDataset '7777f6f6-b192-4585-97b3-e7b...
67a9fb5f-443a-43c8-8078-030312f93edb,1,<PublishedDataset '67a9fb5f-443a-43c8-8078-030...
42d368e7-f4be-41db-81e7-0ad892617730,1,<PublishedDataset '42d368e7-f4be-41db-81e7-0ad...


In [7]:
first = datasets.iloc[0].dataset

In [8]:
display_link_to_published_dataset(first.id, first.version)

### Create a new dataset from file (high level operation)

Here everything is done automatically – you pass in an open file, a name and an optional arbitrary metadata dict and the dataset is created, uploaded and published. The returned value is an instance of the `PublishedDataset` class (see below for how to get a `pandas.DataFrame` from the dataset).

In [9]:
with open ('../../tests/Serialization/data/small1.csv') as f:
    dataset = api.create_published_dataset_from_csv_file("python test", f, {"metadata-dummy-string": "string value", "metadata-dummy-number": 42.0})
dataset                                                                       

<PublishedDataset '9e9b729e-e4cd-428a-be93-5efffb5b7d18':1 - python test>

### Get data from a published dataset

In [10]:
dataframe = dataset.get_data()
dataframe

Unnamed: 0,Sample ID,Method name,Toxicity domain,Information domain,Date,Experiment ID,Organization abbreviation,Organization full name,Name of PI,Person entering data,...,Endpoint 1 measure,Endpoint 1 definition,Endpoint 1 type,Endpoint 1 readout method,Endpoint 1 data,Endpoint 1 data unit,Endpoint 1 mean,Endpoint 1 standard deviation,Endpoint 1 standard deviation unit,Comments / problems with compound
1,Part1_1_1,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,469644,relative light units,465007.333,1.558,%,
2,Part1_1_2,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,468717,relative light units,465007.333,1.558,%,
3,Part1_1_3,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,456661,relative light units,465007.333,1.558,%,
4,Part1_2_1,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,464629,relative light units,514442.667,8.438,%,
5,Part1_2_2,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,544186,relative light units,514442.667,8.438,%,
6,Part1_2_3,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,534513,relative light units,514442.667,8.438,%,
7,Part1_3_1,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,517964,relative light units,477431.667,10.248,%,
8,Part1_3_2,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,491249,relative light units,477431.667,10.248,%,
9,Part1_3_3,BDS22a_Tox_RGA_cytotox_act_24h,RDT and DART,cytotoxicity,2017-03-01,PH11410 010317 HAI-YEN CYTOTOX-CALUX-01,BDS,BioDetection Systems bv,Bart van der Burg,Barbara van Vugt-Lussenburg,...,luciferase,cytotoxicity,functional readout,luminescence,423082,relative light units,477431.667,10.248,%,


In [11]:
aggregations = dataset.get_data_aggregations()
aggregations[aggregations != len(dataframe)]

bucket         term                  
Compound       2-Methyl butanoic acid    6
               DMSO                      3
Group ID       Part1_1                   3
               Part1_2                   3
               Part1_3                   3
Sample type    blank                     3
               test                      6
Pairing blank                            3
               GPart1_1                  6
dtype: int64

### Delete a dataset and all its versions

In [15]:
datasets = api.get_published_datasets()
datasets

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset
id,version,Unnamed: 2_level_1
71599d67-cd32-445b-a428-4e16b64e4c40,1,<PublishedDataset '71599d67-cd32-445b-a428-4e1...
5224ad4c-2f2f-4490-973a-539b21a361d9,1,<PublishedDataset '5224ad4c-2f2f-4490-973a-539...
b45dc6d8-518a-4d18-8ac2-86a5ed6d4245,1,<PublishedDataset 'b45dc6d8-518a-4d18-8ac2-86a...
51c2e954-74cf-49c1-809b-7be86c4fcbb1,1,<PublishedDataset '51c2e954-74cf-49c1-809b-7be...
ce0643c3-9a66-4de0-a4b4-0fd3c006ec27,1,<PublishedDataset 'ce0643c3-9a66-4de0-a4b4-0fd...
...,...,...
50f6021e-d794-44c1-9211-f08abab199f8,1,<PublishedDataset '50f6021e-d794-44c1-9211-f08...
7777f6f6-b192-4585-97b3-e7b4408b38aa,1,<PublishedDataset '7777f6f6-b192-4585-97b3-e7b...
67a9fb5f-443a-43c8-8078-030312f93edb,1,<PublishedDataset '67a9fb5f-443a-43c8-8078-030...
42d368e7-f4be-41db-81e7-0ad892617730,1,<PublishedDataset '42d368e7-f4be-41db-81e7-0ad...


In [13]:
dataset2.delete_all_versions()

### Create new dataset from file – the manual way

Here we have to: 
* create a new in-progress dataset
* Upload the data
* Infer a schema (or we could alternatively upload one)
* Optionally upload metadata (a python dict object that will be serialized as json)
* Finally publish the dataset


In [21]:
datafile = 'C:\\Users\\danyx\\Downloads\\epa\\EPA-InVitroDBV3.2-TOX21_AR_LUC_MDAKB2_Antagonist_10nM_R1881_viability_summary_tcpl.csv'
name = 'My dataset'
schemafile = 'C:\\Users\\danyx\\Downloads\\epa\\EPA-InVitroDBV3.2-TOX21_AR_LUC_MDAKB2_Antagonist_10nM_R1881_viability_summary_tcpl.json' # if none, schema will be inferred below
metadata = None # dict object that will be serialized to json or None
metadatafile = None # path to the metadata file or None
description = "This is a *markdown* description that can use [hyperlinks](https://edelweissconnect.com)"

dataset1 = api.create_in_progress_dataset(name)
print('DATASET:', dataset1)
try:
    with open(datafile) as f:
        dataset1.upload_data(f)
    if schemafile is not None:
        print('uploading schema from file ...')
        with open(schemafile) as f:
            dataset1.upload_schema_file(f)
    else:
        print('inferring schema from file ...')
        dataset1.infer_schema()
    if metadata is not None:
        print('uploading metadata ...')
        dataset1.upload_metadata(metadata)
    elif metadatafile is not None:
        print('uploading metadata from file ...')
        with open(metadatafile) as f:
            dataset1.upload_metadata_file(f)

    dataset1.set_description(description)

    published_dataset = dataset1.publish('My first commit')
    print('DATASET published:',published_dataset)
except requests.HTTPError as err:
    print('not published: ', err.response.text)

DATASET: <InProgressDataset '2dc7647a-5075-4667-b05b-45600fbc9b60' - My dataset>
uploading schema from file ...
DATASET published: <PublishedDataset '2dc7647a-5075-4667-b05b-45600fbc9b60':1 - My dataset>


In [15]:
dataset1.metadata

{}