## Using Transfer Service upload data with metadata

In [103]:
import requests 
import json

dataset_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"Dataset",
    "name":"Raw Data",
    "description":"Heart Rate Measures from patient from admission to discharge.",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
        }
    ],
}

Below is API call to post dataset to minio and metadata to mongo

In [104]:
upload_data = requests.post(
    'https://clarklab.uvarc.io/transfer/data/',
    files = {
        'files':open('UVA_7129_HR2.csv','rb'),
        'metadata':json.dumps(dataset_meta)
    }
)

data_id = upload_data.json()['Minted Identifiers'][0]
upload_data.json()

{'All files uploaded': True,
 'Failed to mint Id for': [],
 'Minted Identifiers': ['ark:99999/81e079a5-a53c-460d-8581-07bd2eb73fe7'],
 'failed to upload': []}

## Check MDS to make sure metadata was uploaded correctly

In [105]:
metadata_request = requests.get('https://clarklab.uvarc.io/mds/' + data_id)
metadata_request.json()

{'@context': {'@vocab': 'http://schema.org/'},
 '@id': 'ark:99999/81e079a5-a53c-460d-8581-07bd2eb73fe7',
 '@type': 'Dataset',
 'author': [{'@id': 'https://orcid.org/0000-0002-1103-3882',
   'affiliation': 'University of Virginia',
   'name': 'Justin Niestroy'}],
 'description': 'Heart Rate Measures from patient from admission to discharge.',
 'distribution': [{'@id': 'ark:99999/c0528aa3-d6b9-44c7-ad91-6433a008d755',
   '@type': 'DataDownload',
   'contentSize': 214877,
   'contentUrl': 'minionas.uvadcos.io/breakfast/UVA_7129_HR2.csv',
   'fileFormat': 'csv',
   'name': 'UVA_7129_HR2.csv'}],
 'name': 'Raw Data',
 'sdPublicationDate': '2020-07-28T20:07:16.332386861Z',
 'url': 'http://ors.uvadcos.io/ark:99999/81e079a5-a53c-460d-8581-07bd2eb73fe7'}

## Upload Spark Script to run on newly uploaded data

In [106]:
spark_code_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"SoftwareSourceCode",
    "name":"Processing  Script",
    "description":"Sample Source Code for HCTSA test on spark",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
      }
    ]
}

In [107]:
upload_script = requests.post(
    'https://clarklab.uvarc.io/transfer/data/',
    files= {
        'files':open('./run_algos.py','rb'),
        'metadata':json.dumps(spark_code_meta)
    }
)

software_id = upload_script.json()['Minted Identifiers'][0]

## Submit Spark Job Using Compute Service

In [108]:
job = {
    "datasetID":data_id,
    "scriptID":software_id
}

create_job = requests.post(
    "https://clarklab.uvarc.io/compute/spark",
    json = job
)

job_id = create_job.content.decode()

In [109]:
job_id

'08684e5e-d397-4177-8fd4-5cf219063e87'

## Watch Running Job

In [111]:
requests.get("http://clarklab.uvarc.io/compute/spark").json()

{'runningJobIds': ['10e7c024-dea5-40cb-ab11-594b5a34086f',
  'ac30b49c-3697-44db-864a-74e654393214']}

## Get Job outputs from job metadata

In [112]:
job_metadata_request = requests.get(
    'https://clarklab.uvarc.io/mds/ark:99999/' + job_id
)

job_metadata = job_metadata_request.json()
output_id = job_metadata.get('evi:supports')

In [113]:
output_id

['ark:99999/e844b72d-a73b-4bbb-9c2c-414a1c8af0f4',
 'ark:99999/056a3156-303d-4eee-9b81-667ea9637809']

In [114]:
output_metadata = [ 
    requests.get('https://clarklab.uvarc.io/mds/' + out).json() for out in output_id
]

In [115]:
output_metadata

[{'@context': {'@vocab': 'http://schema.org/'},
  '@id': 'ark:99999/e844b72d-a73b-4bbb-9c2c-414a1c8af0f4',
  'distribution': [{'@id': 'ark:99999/7a904ded-4a20-4702-aab5-7dafa3ee60b2',
    '@type': 'DataDownload',
    'contentUrl': 'minio:9000/breakfast/08684e5e-d397-4177-8fd4-5cf219063e87/_SUCCESS',
    'fileFormat': '_SUCCESS',
    'name': '_SUCCESS'}],
  'evi:generatedBy': {'@id': 'ark:99999/08684e5e-d397-4177-8fd4-5cf219063e87'},
  'name': '_SUCCESS',
  'sdPublicationDate': '2020-07-28T20:08:57.266793026Z',
  'url': 'http://ors.uvadcos.io/ark:99999/e844b72d-a73b-4bbb-9c2c-414a1c8af0f4'},
 {'@context': {'@vocab': 'http://schema.org/'},
  '@id': 'ark:99999/056a3156-303d-4eee-9b81-667ea9637809',
  'distribution': [{'@id': 'ark:99999/b2ef4061-67ae-4c77-9d76-64d3fdd06ad3',
    '@type': 'DataDownload',
    'contentUrl': 'minio:9000/breakfast/08684e5e-d397-4177-8fd4-5cf219063e87/part-00000-4423af52-5ac9-45ed-9f8d-99d7ca069581-c000.csv',
    'fileFormat': 'csv',
    'name': 'part-00000-4423

## Upload Image Code

In [139]:
spark_graphic_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"SoftwareSourceCode",
    "name":"Image Script",
    "description":"Creates heatmap of patients stay",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
      }
    ]
}

In [140]:
files = {
    'files':open('./make_graphic.py','rb'),
    'metadata':json.dumps(spark_graphic_meta)
}

upload_graphic_code = requests.post(
    'https://clarklab.uvarc.io/transfer/data/',
    files=files
)

graphic_code_id = upload_graphic_code.json()['Minted Identifiers'][0]
graphic_code_id

'ark:99999/e5703b2c-22d4-4e1b-a98c-d2fe9e133675'

## Run Spark Job to create Image

In [141]:
image_job = {
    "datasetID":output_id[1],
    "scriptID": graphic_code_id
}

image_job_request = requests.post(
    "https://clarklab.uvarc.io/compute/spark",
    json = image_job
)

image_job_id = 'ark:99999/' + image_job_request.content.decode()
image_job_id

'ark:99999/f6f6448b-06a1-443e-bba1-06b06fd27170'

## Check on running jobs

In [149]:
requests.get("http://clarklab.uvarc.io/compute/spark").json()

{'runningJobIds': ['10e7c024-dea5-40cb-ab11-594b5a34086f',
  'ac30b49c-3697-44db-864a-74e654393214']}

### Get image ID from Updated Job ID

In [153]:
image_job_metadata = requests.get(
    'https://clarklab.uvarc.io/mds/' + image_job_id
).json()
image_id = image_job_metadata.get('evi:supports')[0]
image_id

'ark:99999/23e73f31-f2c4-4551-b83a-b8709fd224c4'

## Get the image metadata

In [155]:
image_metadata = requests.get(
    'https://clarklab.uvarc.io/mds/' + image_id
).json()
image_metadata

{'@context': {'@vocab': 'http://schema.org/'},
 '@id': 'ark:99999/23e73f31-f2c4-4551-b83a-b8709fd224c4',
 'distribution': [{'@id': 'ark:99999/b535a7e6-c7fa-4ed1-ab3e-52f3c4986413',
   '@type': 'DataDownload',
   'contentUrl': 'minio:9000/breakfast/f6f6448b-06a1-443e-bba1-06b06fd27170/Histogram_Heatmap.png',
   'fileFormat': 'png',
   'name': 'Histogram_Heatmap.png'}],
 'evi:generatedBy': {'@id': 'ark:99999/f6f6448b-06a1-443e-bba1-06b06fd27170'},
 'name': 'Histogram_Heatmap.png',
 'sdPublicationDate': '2020-07-28T20:30:25.351057523Z',
 'url': 'http://ors.uvadcos.io/ark:99999/23e73f31-f2c4-4551-b83a-b8709fd224c4'}

### Build Evidence graph of created image using the evidence graph service

In [166]:
evidence_graph = requests.get(
    'https://clarklab.uvarc.io/evidencegraph/eg/' + image_id
)

In [170]:
evidence_graph.json()

{'@context': {'@vocab': 'http://schema.org/', 'evi': 'http://purl.org/evi/'},
 '@id': 'ark:99999/23e73f31-f2c4-4551-b83a-b8709fd224c4',
 'evi:generatedBy': {'@id': 'ark:99999/f6f6448b-06a1-443e-bba1-06b06fd27170',
  '@type': 'evi:Computation',
  'began': 'Tuesday, July 28, 2020 08:29:54',
  'evi:supports': 'ark:99999/23e73f31-f2c4-4551-b83a-b8709fd224c4',
  'evi:usedDataset': {'@id': 'ark:99999/056a3156-303d-4eee-9b81-667ea9637809',
   'evi:generatedBy': {'@id': 'ark:99999/08684e5e-d397-4177-8fd4-5cf219063e87',
    '@type': 'evi:Computation',
    'began': 'Tuesday, July 28, 2020 08:07:56',
    'evi:supports': [{'@id': 'ark:99999/e844b72d-a73b-4bbb-9c2c-414a1c8af0f4'},
     {'@id': 'ark:99999/056a3156-303d-4eee-9b81-667ea9637809'}],
    'evi:usedDataset': {'@id': 'ark:99999/81e079a5-a53c-460d-8581-07bd2eb73fe7',
     '@type': 'Dataset',
     'author': {'@id': 'https://orcid.org/0000-0002-1103-3882',
      'name': 'Justin Niestroy'},
     'name': 'Raw Data'},
    'evi:usedSoftware': {'@i

## View Visualization of the Evidence Graph

In [171]:
'https://clarklab.uvarc.io/viz/' + image_id

'https://clarklab.uvarc.io/viz/ark:99999/23e73f31-f2c4-4551-b83a-b8709fd224c4'