This notebook is intended to walk through the process of copying a file from another workspace (source/"user" workspace) to this one (destination/"service" workspace). 

To use this, you should upload a test file to the "workspace files" (storage container) of the source workspace, and note the name and namespace (project) of the source workspace, as well as the test file name to be copied, in the cell below labeled `# source workspace and file information`.

## Setup

In [11]:
import json
import os
import requests
from pprint import pprint

In [12]:
# dev API urls
env = 'prod'
WSM_URL = f'https://workspace.dsde-{env}.broadinstitute.org'
TSPS_URL = f'https://tsps.dsde-{env}.broadinstitute.org'
ORCH_URL = f'https://firecloud-orchestration.dsde-{env}.broadinstitute.org/'

In [13]:
def get_access_token(verbose=False):
    """Get access token for pet managed identity in Azure."""
    if verbose:
        !az login --identity --allow-no-subscriptions
    else:
        !az login --identity --allow-no-subscriptions --output none
    cli_token = !az account get-access-token | jq .accessToken

    return cli_token[0].replace('"', '')

def get_headers(verb='GET', verbose=False):
    headers = {
        'Authorization': 'Bearer ' + get_access_token(verbose),
        'accept': '*/*'
    }
    
    if verb == 'POST':
        headers['Content-Type'] = 'application/json'

    return headers

In [14]:
# get headers once - refresh this as needed
HEADERS_GET = get_headers('GET')
HEADERS_POST = get_headers('POST')

### workspace functions
def get_workspace_id(ws_project, ws_name, orch_url=ORCH_URL, verbose=False):
    uri = f'{orch_url}/api/workspaces/{ws_project}/{ws_name}'
    
    response = requests.get(uri, headers=HEADERS_GET)
    
    return response.json()['workspace']['workspaceId']

def get_workspace_sc_resource_id(ws_id, wsm_url=WSM_URL, verbose=False):
    uri = f'{wsm_url}/api/workspaces/v1/{ws_id}/resources?offset=0&limit=10&resource=AZURE_STORAGE_CONTAINER'
    
    response = requests.get(uri, headers=HEADERS_GET)
    
    sc_resource_id = None
    for info_dict in response.json()['resources']:
        if info_dict['metadata']['controlledResourceMetadata']['accessScope'] == 'SHARED_ACCESS':
            sc_resource_id = info_dict['metadata']['resourceId']         
    
    return sc_resource_id


### file functions
def get_sas_token_for_blob(blob_name, 
                           ws_id, 
                           ws_sc_id, 
                           permissions='r', 
                           wsm_url=WSM_URL,
                           verbose=False):
    uri = f'{wsm_url}/api/workspaces/v1/{ws_id}/resources/controlled/azure/storageContainer/{ws_sc_id}/getSasToken?sasPermissions={permissions}&sasBlobName={blob_name}'
    
    response = requests.post(uri, headers=HEADERS_GET, data='')
    status_code = response.status_code
    
    return response.json()['url']


### tsps functions
def get_available_tsps_pipelines(tsps_url=TSPS_URL, 
                                 verbose=False):
    uri = f'{tsps_url}/api/pipelines/v1alpha1'
    
    response = requests.get(uri, headers=HEADERS_GET)
    
    return response.json()

def request_job(pipeline_id, 
                pipeline_version, 
                tsps_url=TSPS_URL, 
                verbose=False):
    uri = f'{tsps_url}/api/jobs/v1alpha1/{pipeline_id}'
    body = json.dumps({
        'pipelineVersion': pipeline_version
    })
    
    response = requests.post(uri, headers=HEADERS_POST, data=body)
    
    return response.json()

def get_job_status(pipeline_id, 
                   job_id, 
                   tsps_url=TSPS_URL, 
                   verbose=False):
    uri = f'{tsps_url}/api/jobs/v1alpha1/{pipeline_id}/{job_id}'
    
    response = requests.get(uri, headers=HEADERS_GET)
    status_code = response.status_code
    
    return response.json()

## user/control workspace setup

In [15]:
# retrieve info for control workspace
ctrl_ws_project= 'dsp-azure-general'
ctrl_ws_name = 'js-imputation-pipeline-testingg'

ctrl_ws_id = get_workspace_id(ctrl_ws_project, ctrl_ws_name)
ctrl_ws_sc_id = get_workspace_sc_resource_id(ctrl_ws_id)

print(f'control workspace ID:                  {ctrl_ws_id}')
print(f'control storage container resource ID: {ctrl_ws_sc_id}')

user_ws_project = 'azure-featured-workspaces'
user_ws_name = 'Imputation User Workspace'

user_ws_id = get_workspace_id(user_ws_project, user_ws_name)
user_ws_sc_id = get_workspace_sc_resource_id(user_ws_id)

print(f'user workspace ID:                  {user_ws_id}')
print(f'user storage container resource ID: {user_ws_sc_id}')


control workspace ID:                  7a6ab368-c165-48ae-8b9a-a2cf1138ef1c
control storage container resource ID: bd889533-6a32-4374-8dc4-74c9a697d6d5
user workspace ID:                  a5ec64f3-69e4-4646-8fb6-db4f882a2dd6
user storage container resource ID: ebe08011-551d-4cd8-97eb-07970e90eade


## Transfer file from user workspace to control workspace

In [6]:

# you need to upload this file to the "workspace files" (storage container) of the user workspace
user_file_to_copy = 'test.txt'  # change this as needed

ctrl_file_destination = 'copied_test.txt'

# get sas token for user file
user_file_sas = get_sas_token_for_blob(user_file_to_copy, user_ws_id, user_ws_sc_id)

# create a target destination SAS token
# NOTE: according to documentation (https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-blobs-copy#guidelines)
# if the copy is between tenants (which we do want to support), we can't use Azure Active Directory (Azure AD) authentication
# and must instead use SAS tokens.
ctrl_file_sas = get_sas_token_for_blob(ctrl_file_destination, 
                                       permissions='w',
                                       ws_id=ctrl_ws_id,
                                       ws_sc_id=ctrl_ws_sc_id)

# azcopy needs the sas urls to be in quotes
source_file_sas_with_quotes = f"'{user_file_sas}'"
dest_file_sas_with_quotes = f"'{ctrl_file_sas}'"

# do the copy
!azcopy copy $source_file_sas_with_quotes $dest_file_sas_with_quotes

https://lze033433beed5b4a6a47de6.blob.core.windows.net/sc-a5ec64f3-69e4-4646-8fb6-db4f882a2dd6/test.txt?sv=2021-12-02&spr=https&st=2023-08-07T14%3A02%3A58Z&se=2023-08-07T15%3A17%3A58Z&sr=b&sp=r&sig=F2A8Kijob0gTuG%2F6MG81xkgmAFGu%2FxAQGviOMvFlvfI%3D&rscd=26845113632036fbad686
https://lz304a1e79fd7359e5327eda.blob.core.windows.net/sc-7a6ab368-c165-48ae-8b9a-a2cf1138ef1c/copied_test.txt?sv=2021-12-02&spr=https&st=2023-08-07T14%3A03%3A00Z&se=2023-08-07T15%3A18%3A00Z&sr=b&sp=w&sig=dRauaCCIrUu3RaNF2wcBfQ69a9YW9R7bOA9PZKoIpLw%3D&rscd=26845113632036fbad686
INFO: Scanning...
INFO: Failed to create one or more destination container(s). Your transfers may still succeed if the container already exists.
INFO: Any empty folders will not be processed, because source and/or destination doesn't have full folder support

Job 6a918b22-c21c-0545-557c-36d9e34543b7 has started
Log file is located at: /home/jupyter/.azcopy/6a918b22-c21c-0545-557c-36d9e34543b7.log

INFO: azcopy: A newer version 10.20.0 is ava

## transfer file from control workspace to user workspace

In [16]:
# you need to upload this file to the "workspace files" (storage container) of the control workspace
ctrl_file_to_copy = 'ctrl_output_file.txt'  # change this as needed

user_file_destination = 'copied_ctrl_output_file123.txt'

# get sas token for user file
ctrl_file_sas = get_sas_token_for_blob(ctrl_file_to_copy, ctrl_ws_id, ctrl_ws_sc_id)

# create a target destination SAS token
# NOTE: according to documentation (https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-blobs-copy#guidelines)
# if the copy is between tenants (which we do want to support), we can't use Azure Active Directory (Azure AD) authentication
# and must instead use SAS tokens.
user_file_sas = get_sas_token_for_blob(user_file_destination, 
                                       permissions='w',
                                       ws_id=user_ws_id,
                                       ws_sc_id=user_ws_sc_id)

# azcopy needs the sas urls to be in quotes
source_file_sas_with_quotes = f"'{ctrl_file_sas}'"
dest_file_sas_with_quotes = f"'{user_file_sas}'"

# do the copy
!azcopy copy $source_file_sas_with_quotes $dest_file_sas_with_quotes

https://lz304a1e79fd7359e5327eda.blob.core.windows.net/sc-7a6ab368-c165-48ae-8b9a-a2cf1138ef1c/ctrl_output_file.txt?sv=2021-12-02&spr=https&st=2023-08-07T16%3A00%3A42Z&se=2023-08-07T17%3A15%3A42Z&sr=b&sp=r&sig=0SpJlkhLujt3hpJn9TFPGyhAyOY88WpzJ51fZrmNo30%3D&rscd=26845113632036fbad686


KeyError: 'url'