## Using Transfer Service upload data with metadata

In [32]:
import requests 
import json

dataset_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"Dataset",
    "name":"Raw Data",
    "description":"Heart Rate Measures from patient from admission to discharge.",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
        }
    ],
}

Below is API call to post dataset to minio and metadata to mongo

In [39]:
files = {
    'files':open('UVA_7129_HR2.csv','rb'),
    'metadata':json.dumps(dataset_meta)
}

url = 'http://transfer-service/data/'
r = requests.post(url,files=files)
r.content.decode()
data_upload = r.json()
data_id = r.json()['Minted Identifiers'][0]

data_upload

'{"All files uploaded":true,"Failed to mint Id for":[],"Minted Identifiers":["ark:99999/a0671c63-a748-4f7f-ba6c-3340d2214cfc"],"failed to upload":[]}\n'

## Check MDS to make sure metadata was uploaded correctly

In [None]:
requests.get('http://mds.ors/' + data_id).json()

## Upload Spark Script to run on newly uploaded data

In [None]:
spark_code_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"SoftwareSourceCode",
    "name":"Processing  Script",
    "description":"Sample Source Code for HCTSA test on spark",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
      }
    ]
}

In [None]:
files = {
    'files':open('/run_algos.py','rb'),
    'metadata':json.dumps(spark_code_meta)
}

url = 'http://transfer-service/data/'

r = requests.post(url,files=files)
r.content.decode()
software_id = r.json()['Minted Identifiers'][0]

## Submit Spark Job Using Compute Service

In [None]:
job = {"datasetID":data_id,"scriptID":software_id}

R = requests.post("http://sparksubmit/job",json = job)
R.content.decode()

## Get Job outputs from job metadata

In [None]:
requests.get('http://mds.ors/ark:99999/').json()['eg:supports']

In [None]:
output_id = 'ark:99999/eace030f-c2b8-4f84-b586-6f571b285d9a'
requests.get('http://mds.ors/' + output_id).json()

## Upload Image Code

In [None]:
spark_graphic_meta = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"SoftwareSourceCode",
    "name":"Image Script",
    "description":"Creates heatmap of patients stay",
    "author":[
        {
            "name":"Justin Niestroy",
            "@id": "https://orcid.org/0000-0002-1103-3882",
            "affiliation":"University of Virginia"
      }
    ]
}

In [None]:
files = {
    'files':open('/make_graphic.py','rb'),
    'metadata':json.dumps(spark_graphic_meta)
}

url = 'http://transfer-service/data/'

r = requests.post(url,files=files)
r.content.decode()
graphic_code_id = r.json()['Minted Identifiers'][0]

## Run Spark Job to create Image

In [None]:
job2 = {"datasetID":output_id,"scriptID":graphic_code_id}

R = requests.post("http://sparksubmit/job",json = job2)
job_id = 'ark:99999/' + R.content.decode()


### Get image ID from updated job id

In [None]:
requests.get('http://mds.ors/' + job_id).json()['eg:supports']


In [None]:
output2_id = requests.get('http://mds.ors/' + job_id).json()['eg:supports'][0]

### Build Evidence graph of created image using the evidence graph service

In [None]:
requests.get('http://eg/eg/' + output2_id).json()

In [3]:
{'@id': 'ark:99999/6d270e8e-8641-4e84-9ae0-2cc19f814ff7', 'eg:generatedBy': {'@id': 'ark:99999/1b03b0fa-c64d-46d2-ac17-fddcdfe677a0', '@type': 'eg:Computation', 'began': 'Tuesday, May 12, 2020 06:03:59', 'eg:usedDataset': {'@id': 'ark:99999/266686a7-8cce-4a7b-90e7-24cad017962f', 'eg:generatedBy': {'@id': 'ark:99999/eb4ccc20-f7c4-480d-b47f-cd849782efd9', 'began': 'Tuesday, May 12, 2020 05:59:48', 'eg:supports': 'ark:99999/f6625ff2-690d-47f5-86bd-5aa96bce2856', 'eg:usedDataset': {'@id': 'ark:99999/227ab04f-4565-4fc3-95d7-e8a5025e36df', '@type': 'Dataset', 'eg:generatedBy': 'ark:99999/716ac9d6-8d97-41a0-8ea1-40cb7b554ef7', 'name': 'Patient 7129 HR'}, 'eg:usedSoftware': {'@id': 'ark:99999/da045a02-51fc-4cac-b3db-46c821e99ce5', 'eg:generatedBy': 'ark:99999/9b89ad2a-ecd9-4c59-a416-9702621c5021', 'name': 'Initial HCTSA Python Spark Implementation'}}, 'name': 'part-00000-6b8274ee-6bb0-4965-92ae-9c35c5eeb0be-c000.csv'}, 'eg:usedSoftware': {'@id': 'ark:99999/5ce6aa2a-03ee-4325-821d-adeb09105b18', '@type': 'SoftwareSourceCode', 'author': {'@id': 'https://orcid.org/0000-0002-1103-3882', 'name': 'Justin Niestroy'}, 'eg:generatedBy': 'ark:99999/7d49d1aa-f7ee-4448-a616-3c9c19eb7445', 'name': 'Heatmap Producer'}}, 'name': 'Histogram_Heatmap.png'}

{'@id': 'ark:99999/6d270e8e-8641-4e84-9ae0-2cc19f814ff7',
 'eg:generatedBy': {'@id': 'ark:99999/1b03b0fa-c64d-46d2-ac17-fddcdfe677a0',
  '@type': 'eg:Computation',
  'began': 'Tuesday, May 12, 2020 06:03:59',
  'eg:usedDataset': {'@id': 'ark:99999/266686a7-8cce-4a7b-90e7-24cad017962f',
   'eg:generatedBy': {'@id': 'ark:99999/eb4ccc20-f7c4-480d-b47f-cd849782efd9',
    'began': 'Tuesday, May 12, 2020 05:59:48',
    'eg:supports': 'ark:99999/f6625ff2-690d-47f5-86bd-5aa96bce2856',
    'eg:usedDataset': {'@id': 'ark:99999/227ab04f-4565-4fc3-95d7-e8a5025e36df',
     '@type': 'Dataset',
     'eg:generatedBy': 'ark:99999/716ac9d6-8d97-41a0-8ea1-40cb7b554ef7',
     'name': 'Patient 7129 HR'},
    'eg:usedSoftware': {'@id': 'ark:99999/da045a02-51fc-4cac-b3db-46c821e99ce5',
     'eg:generatedBy': 'ark:99999/9b89ad2a-ecd9-4c59-a416-9702621c5021',
     'name': 'Initial HCTSA Python Spark Implementation'}},
   'name': 'part-00000-6b8274ee-6bb0-4965-92ae-9c35c5eeb0be-c000.csv'},
  'eg:usedSoftware': 