## Setup

In [None]:
import json
import os
import requests
from pprint import pprint

In [None]:
# retrieve info for this (source) workspace
WS_ID = os.getenv('WORKSPACE_ID')
WS_SC_ID = os.getenv('WORKSPACE_STORAGE_CONTAINER_ID')
WS_NAME = os.getenv('WORKSPACE_NAME')
WS_SC_URL = os.getenv('WORKSPACE_STORAGE_CONTAINER_URL')

print(f'workspace ID:                  {WS_ID}')
print(f'storage container resource ID: {WS_SC_ID}')
print(f'storage container url:         {WS_SC_URL}')

In [None]:
# dev API urls
WSM_URL = 'https://workspace.dsde-dev.broadinstitute.org'
TSPS_URL = 'https://tsps.dsde-dev.broadinstitute.org'
ORCH_URL = 'https://firecloud-orchestration.dsde-dev.broadinstitute.org/'

In [None]:
def get_access_token(verbose=False):
    """Get access token for pet managed identity in Azure."""
    if verbose:
        !az login --identity --allow-no-subscriptions
    else:
        !az login --identity --allow-no-subscriptions --output none
    cli_token = !az account get-access-token | jq .accessToken

    return cli_token[0].replace('"', '')

def get_headers(verb='GET', verbose=False):
    headers = {
        'Authorization': 'Bearer ' + get_access_token(verbose),
        'accept': '*/*'
    }
    
    if verb == 'POST':
        headers['Content-Type'] = 'application/json'

    return headers

In [None]:
# get headers once - refresh this as needed
HEADERS_GET = get_headers('GET')
HEADERS_POST = get_headers('POST')

### workspace functions
def get_workspace_id(ws_project, ws_name, orch_url=ORCH_URL, verbose=False):
    uri = f'{orch_url}/api/workspaces/{ws_project}/{ws_name}'
    
    response = requests.get(uri, headers=HEADERS_GET)
    
    return response.json()['workspace']['workspaceId']

def get_workspace_sc_resource_id(ws_id, wsm_url=WSM_URL, verbose=False):
    uri = f'{wsm_url}/api/workspaces/v1/{ws_id}/resources?offset=0&limit=10&resource=AZURE_STORAGE_CONTAINER'
    
    response = requests.get(uri, headers=HEADERS_GET)
    
    sc_resource_id = None
    for info_dict in response.json()['resources']:
        if info_dict['metadata']['controlledResourceMetadata']['accessScope'] == 'SHARED_ACCESS':
            sc_resource_id = info_dict['metadata']['resourceId']         
    
    return sc_resource_id


### file functions
def get_sas_token_for_blob(blob_name, 
                           permissions='r', 
                           ws_id=WS_ID, 
                           ws_sc_id=WS_SC_ID, 
                           wsm_url=WSM_URL,
                           verbose=False):
    uri = f'{wsm_url}/api/workspaces/v1/{ws_id}/resources/controlled/azure/storageContainer/{ws_sc_id}/getSasToken?sasPermissions={permissions}&sasBlobName={blob_name}'
    
    response = requests.post(uri, headers=HEADERS_GET, data='')
    status_code = response.status_code
    
    return response.json()['url']


### tsps functions
def get_available_tsps_pipelines(tsps_url=TSPS_URL, 
                                 verbose=False):
    uri = f'{tsps_url}/api/pipelines/v1alpha1'
    
    response = requests.get(uri, headers=HEADERS_GET)
    
    return response.json()

def request_job(pipeline_id, 
                pipeline_version, 
                tsps_url=TSPS_URL, 
                verbose=False):
    uri = f'{tsps_url}/api/jobs/v1alpha1/{pipeline_id}'
    body = json.dumps({
        'pipelineVersion': pipeline_version
    })
    
    response = requests.post(uri, headers=HEADERS_POST, data=body)
    
    return response.json()

def get_job_status(pipeline_id, 
                   job_id, 
                   tsps_url=TSPS_URL, 
                   verbose=False):
    uri = f'{tsps_url}/api/jobs/v1alpha1/{pipeline_id}/{job_id}'
    
    response = requests.get(uri, headers=HEADERS_GET)
    status_code = response.status_code
    
    return response.json()

## File copy stuff

In [None]:
# set up all the parameters
source_file_to_copy = 'test.txt'

dest_ws_project = 'rtitle-playground'
dest_ws_name = 'morgan_test_destination_workspace'

dest_blob_name = 'copied_test.txt'

In [None]:
# get sas token for source file
source_file_sas = get_sas_token_for_blob(source_file_to_copy)
print(source_file_sas)

In [None]:
# get the destination workspace information
dest_ws_id = get_workspace_id(dest_ws_project, dest_ws_name)
dest_sc_id = get_workspace_sc_resource_id(dest_ws_id)

print(f'destination workspace ID:                  {dest_ws_id}')
print(f'destination storage container resource ID: {dest_sc_id}')

In [None]:
# create a target destination SAS token
# NOTE: according to documentation (https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-blobs-copy#guidelines)
# if the copy is between tenants (which we do want to support), we can't use Azure Active Directory (Azure AD) authentication
# and must instead use SAS tokens.
dest_file_sas = get_sas_token_for_blob(dest_blob_name, 
                           permissions='w', 
                           ws_id=dest_ws_id, 
                           ws_sc_id=dest_sc_id) 
print(dest_file_sas)

In [None]:
# azcopy needs the sas urls to be in quotes
source_file_sas_with_quotes = f"'{source_file_sas}'"
dest_file_sas_with_quotes = f"'{dest_file_sas}'"

# do the copy
!azcopy copy $source_file_sas_with_quotes $dest_file_sas_with_quotes

## Imputation service stuff

In [None]:
response = get_available_tsps_pipelines()
pprint(response)

In [None]:
response = request_job("imputation", "1")
job_id = response['jobId']
print(job_id)

In [None]:
response = get_job_status("imputation", job_id)
pprint(response)