## Using Transfer Service upload data with metadata

In [38]:
import requests 
import json

dataset_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"Dataset",
    "name":"Raw Data",
    "description":"Heart Rate Measures from patient from admission to discharge.",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
        }
    ],
}

Below is API call to post dataset to minio and metadata to mongo

In [39]:
upload_data = requests.post(
    'https://clarklab.uvarc.io/transfer/data/',
    files = {
        'files':open('UVA_7129_HR2.csv','rb'),
        'metadata':json.dumps(dataset_meta)
    }
)

data_id = upload_data.json()['Minted Identifiers'][0]
upload_data.json()

{'All files uploaded': True,
 'Failed to mint Id for': [],
 'Minted Identifiers': ['ark:99999/068afbeb-3f86-4c2c-a9ce-f587db573039'],
 'failed to upload': []}

## Check MDS to make sure metadata was uploaded correctly

In [40]:
metadata_request = requests.get('https://clarklab.uvarc.io/mds/' + data_id)
metadata_request.json()

{'@context': {'@vocab': 'http://schema.org/'},
 '@id': 'ark:99999/068afbeb-3f86-4c2c-a9ce-f587db573039',
 '@type': 'Dataset',
 'author': [{'@id': 'https://orcid.org/0000-0002-1103-3882',
   'affiliation': 'University of Virginia',
   'name': 'Justin Niestroy'}],
 'description': 'Heart Rate Measures from patient from admission to discharge.',
 'distribution': [{'@id': 'ark:99999/3886b6cf-5f72-4ade-b61c-24396732ee32',
   '@type': 'DataDownload',
   'contentSize': 201072,
   'contentUrl': 'minionas.uvadcos.io/breakfast/UVA_7129_HR2.csv',
   'fileFormat': 'csv',
   'name': 'UVA_7129_HR2.csv'}],
 'name': 'Raw Data',
 'sdPublicationDate': '2020-07-29T00:34:51.288908765Z',
 'url': 'http://ors.uvadcos.io/ark:99999/068afbeb-3f86-4c2c-a9ce-f587db573039'}

## Upload Spark Script to run on newly uploaded data

In [41]:
spark_code_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"SoftwareSourceCode",
    "name":"Processing  Script",
    "description":"Sample Source Code for HCTSA test on spark",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
      }
    ]
}

In [42]:
upload_script = requests.post(
    'https://clarklab.uvarc.io/transfer/data/',
    files= {
        'files':open('./run_algos.py','rb'),
        'metadata':json.dumps(spark_code_meta)
    }
)

software_id = upload_script.json()['Minted Identifiers'][0]

## Submit Spark Job Using Compute Service

In [43]:
job = {
    "datasetID":data_id,
    "scriptID":software_id
}

create_job = requests.post(
    "https://clarklab.uvarc.io/compute/spark",
    json = job
)

job_id = create_job.content.decode()

In [44]:
job_id

'ark:99999/a69f31fd-fa20-4c28-bfb9-1f38110204e5'

## Watch Running Job

In [47]:
requests.get("http://clarklab.uvarc.io/compute/spark").json()

{'runningJobIds': ['2ee431b3-5fea-4005-bf6d-dc12609ff059']}

## Get Job outputs from job metadata

In [48]:
job_metadata_request = requests.get(
    'https://clarklab.uvarc.io/mds/' + job_id
)

job_metadata = job_metadata_request.json()
output_id = job_metadata.get('evi:supports')

In [49]:
output_id

[{'@id': 'ark:99999/0a32bd4e-fe82-4756-8cc7-8ae73525f531'},
 {'@id': 'ark:99999/9947a4d9-2680-4fa6-9f19-4fd1845091a6'}]

In [50]:
output_metadata = [ 
    requests.get('https://clarklab.uvarc.io/mds/' + out['@id']).json() for out in output_id
]

In [51]:
output_metadata

[{'@context': {'@vocab': 'http://schema.org/'},
  '@id': 'ark:99999/0a32bd4e-fe82-4756-8cc7-8ae73525f531',
  '@type': 'Dataset',
  'distribution': [{'@id': 'ark:99999/2b69f800-f614-4698-be14-67c949f87b2e',
    '@type': 'DataDownload',
    'contentUrl': 'minio:9000/breakfast/a69f31fd-fa20-4c28-bfb9-1f38110204e5/_SUCCESS',
    'fileFormat': '_SUCCESS',
    'name': '_SUCCESS'}],
  'evi:generatedBy': {'@id': 'ark:99999/a69f31fd-fa20-4c28-bfb9-1f38110204e5'},
  'name': '_SUCCESS',
  'sdPublicationDate': '2020-07-29T00:35:28.110474048Z',
  'url': 'http://ors.uvadcos.io/ark:99999/0a32bd4e-fe82-4756-8cc7-8ae73525f531'},
 {'@context': {'@vocab': 'http://schema.org/'},
  '@id': 'ark:99999/9947a4d9-2680-4fa6-9f19-4fd1845091a6',
  '@type': 'Dataset',
  'distribution': [{'@id': 'ark:99999/e9bae24b-b283-4e09-9f08-0e066a3e97fc',
    '@type': 'DataDownload',
    'contentUrl': 'minio:9000/breakfast/a69f31fd-fa20-4c28-bfb9-1f38110204e5/part-00000-4d490ac2-0c07-4c9a-9fc2-7018106104fe-c000.csv',
    'file

## Upload Image Code

In [52]:
spark_graphic_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"SoftwareSourceCode",
    "name":"Image Script",
    "description":"Creates heatmap of patients stay",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
      }
    ]
}

In [53]:
files = {
    'files':open('./make_graphic.py','rb'),
    'metadata':json.dumps(spark_graphic_meta)
}

upload_graphic_code = requests.post(
    'https://clarklab.uvarc.io/transfer/data/',
    files=files
)

graphic_code_id = upload_graphic_code.json()['Minted Identifiers'][0]
graphic_code_id

'ark:99999/5453c2db-4eda-4e47-9231-a2c0f3d8f141'

## Run Spark Job to create Image

In [24]:
image_job = {
    "datasetID":output_id[1]['@id'],
    "scriptID": graphic_code_id
}


image_job_request = requests.post(
    "https://clarklab.uvarc.io/compute/spark",
    json = image_job
)

image_job_id =  image_job_request.content.decode()
image_job_id

'ark:99999/6312878c-a2ed-4b83-b298-6c7f16538815'

## Check on running jobs

In [54]:
requests.get("http://clarklab.uvarc.io/compute/spark").json()

{'runningJobIds': ['2ee431b3-5fea-4005-bf6d-dc12609ff059']}

### Get image ID from Updated Job ID

In [55]:
image_job_metadata = requests.get(
    'https://clarklab.uvarc.io/mds/' + image_job_id
).json()
image_id = image_job_metadata.get('evi:supports')[0]['@id']
image_id

'ark:99999/7681ef7e-82bf-4fa1-913a-f1870d8203ea'

## Get the image metadata

In [56]:
image_metadata = requests.get(
    'https://clarklab.uvarc.io/mds/' + image_id
).json()
image_metadata

{'@context': {'@vocab': 'http://schema.org/'},
 '@id': 'ark:99999/7681ef7e-82bf-4fa1-913a-f1870d8203ea',
 '@type': 'Dataset',
 'distribution': [{'@id': 'ark:99999/1011186f-6131-45a9-921d-d3ef6b8921d1',
   '@type': 'DataDownload',
   'contentUrl': 'minio:9000/breakfast/6312878c-a2ed-4b83-b298-6c7f16538815/Histogram_Heatmap.png',
   'fileFormat': 'png',
   'name': 'Histogram_Heatmap.png'}],
 'evi:generatedBy': {'@id': 'ark:99999/6312878c-a2ed-4b83-b298-6c7f16538815'},
 'name': 'Histogram_Heatmap.png',
 'sdPublicationDate': '2020-07-28T23:32:14.94755418Z',
 'url': 'http://ors.uvadcos.io/ark:99999/7681ef7e-82bf-4fa1-913a-f1870d8203ea'}

### Build Evidence graph of created image using the evidence graph service

In [57]:
evidence_graph = requests.get(
    'https://clarklab.uvarc.io/evidencegraph/eg/' + image_id
)

In [58]:
evidence_graph.json()

{'@context': {'@vocab': 'http://schema.org/', 'evi': 'http://purl.org/evi/'},
 '@id': 'ark:99999/7681ef7e-82bf-4fa1-913a-f1870d8203ea',
 '@type': 'Dataset',
 'evi:generatedBy': {'@id': 'ark:99999/6312878c-a2ed-4b83-b298-6c7f16538815',
  '@type': 'evi:Computation',
  'began': 'Tuesday, July 28, 2020 11:31:44',
  'evi:usedDataset': {'@id': 'ark:99999/496b5ab9-1fd9-4d76-8e00-fb3a616313c1',
   '@type': 'Dataset',
   'evi:generatedBy': {'@id': 'ark:99999/a86cc2b6-6c3d-4dde-a8a0-ba84c49f8082',
    '@type': 'evi:Computation',
    'began': 'Tuesday, July 28, 2020 11:29:31',
    'evi:usedDataset': {'@id': 'ark:99999/c00d2f61-3943-4187-8ae4-f063ae99c56f',
     '@type': 'Dataset',
     'author': {'@id': 'https://orcid.org/0000-0002-1103-3882',
      '@type': 'Person',
      'name': 'Justin Niestroy'},
     'name': 'Raw Data'},
    'evi:usedSoftware': {'@id': 'ark:99999/ebbbd304-8c8c-40bb-8e91-dc4e3157c813',
     '@type': 'SoftwareSourceCode',
     'author': {'@id': 'https://orcid.org/0000-0002-11

## View Visualization of the Evidence Graph

In [59]:
'https://clarklab.uvarc.io/viz/' + image_id

'https://clarklab.uvarc.io/viz/ark:99999/7681ef7e-82bf-4fa1-913a-f1870d8203ea'