In [1]:
# TRANSFER ISA STRUCTURE FROM ONE SEEK TO ANOTHER: ASSAY AND DATA_FILE (INCLUDING BLOB)

# SOURCE SEEK: 
#    GET/READ/PRINT JSON RESOURCE (ASSAY)    readJsonData()               (session.get)
#    PRINT RELATIONSHIPS OF AN ASSAY         printJsonDataRelationships()  
#    READ DATA FILE BLOB, DOWNLOAD BLOB      readBlobData()(              (session.get, urlopen(Request(url=download_link, headers=headers2)))
#    ISA STRUCTURE (ASSAY)                   determineISAstructureFromRelationships()  (not really doing anything)
# TARGET SEEK: 
#    REGISTER ASSAY                          registerAssay()              (session.post)
#    REGISTER DATA FILE AND BLOB             registerBlobData()           (session.post)
#    UPLOAD BLOB INTO DATA FILE              uploadBlobData()             (session.put)
#    COMBINES REGISTER DATA FILE AND UPLOAD BLOB   TransferData()

# USING 2 SEEKS

In [2]:
"""
Import the libraries so that they can be used within the notebook

  * **requests** is used to make HTTP calls
  * **json** is used to encode and decode strings into JSON
  * **string** is used to perform text manipulation and checking
  * **pandas** helps format the JSON data in a more readable format
"""

import requests
import json
import string
# Importing the libraries we need to format the data in a more readable way. 
import pandas as pd
from pandas.io.json import json_normalize
#authentication
import getpass
import urllib.request
from urllib.request import urlopen, Request
from PIL import Image
import io

In [3]:
### FUNCTIONS

def authenticate(headers):
    session = requests.Session()
    session.headers.update(headers)
    session.auth = (input('Username:'), getpass.getpass('Password')) 
    return session

### GET JSON
def json_for_resource(session, headers_json, url, type, id):    
  r = session.get(url + "/" + type + "/" + str(id), headers=headers_json)
  if (r.status_code != 200):
    print(r.json())
  r.raise_for_status()
  return r.json()

### READ / PRINT JSON
def readJsonData(session, headers_json, url, data_id, data_type):
    result_json = json_for_resource(session, headers_json, url, data_type, data_id)
    filetitle = result_json['data']['attributes']['title']
    print("Name of \'" + data_type + "\': " + filetitle + "\n")
    print(result_json)
    return result_json

### PRINT RELATIONSHIPS OF A JSON
def printJsonDataRelationships(session, headers_json, source_base_url, input_data):#, data_types_list
    files = []
    #for dtype in data_types_list:
    for dtype in input_data['data']['relationships']:
        print(dtype)    #data_files, investigation, study, projects

        if(dtype=='investigation' or dtype=='study'):#formated differently?
            #'projects': {'data': [{'id': '2', 'type': 'projects'}]}
            #'investigation': {'data': {'id': '3', 'type': 'investigations'}}
            #'study': {'data': {'id': '3', 'type': 'studies'}}
            #'data_files': {'data': [{'id': '38', 'type': 'data_files'}]}
            item = input_data['data']['relationships'][str(dtype)]['data']
            #print("item ", item)

            j = json_for_resource(session,headers_json,source_base_url,item['type'],item['id'])  
            files.append({
                'type':j['data']['type'],
                'id':j['data']['id'],
                'title':j['data']['attributes']['title'],      
            })

        else: #if(dtype=='data_files' or dtype=='projects'):
            for item in input_data['data']['relationships'][str(dtype)]['data']:
                #print("item ", item)

                j = json_for_resource(session,headers_json,source_base_url,item['type'],item['id'])  
                files.append({
                    'type':str(dtype),
                    'id':j['data']['id'],
                    'title':j['data']['attributes']['title'],      
                })

    print() 
    print(str(len(files)) + " relationships found: \n") #print(str(len(files)) + " \'" + grep_typep + "\' found: \n") 
    print(json_normalize(files)) 
    return files


### ISA STRUCTURE (ASSAY) (not really doing anything)
def determineISAstructureFromRelationships(input_relationships):#only for assays, single data file
    
    #print(source_relationships)
    #print(source_relationships[0]['type'])
    #print(len(source_relationships))
    
    if(source_data_type == 'assays'):
        structure = ['projects', 'investigations', 'studies', 'data_files', 'creators']
        isa_structure = []
    #    for x in range(0, len(source_relationships)):
    #        #print(source_relationships[x]['type'])
    #        dtype = source_relationships[x]['type']
    #        #print(dtype)
    #        if(dtype=='projects'):
    #            isa_structure.append(source_relationships[x])
    #            
    #    for x in range(0, len(source_relationships)):
    #        #print(source_relationships[x]['type'])
    #        dtype = source_relationships[x]['type']
    #        #print(dtype)
    #        if(dtype=='investigations'):
    #            isa_structure.append(source_relationships[x])
    #            
    #    for x in range(0, len(source_relationships)):
    #        #print(source_relationships[x]['type'])
    #        dtype = source_relationships[x]['type']
    #        #print(dtype)
    #        if(dtype=='studies'):
    #            isa_structure.append(source_relationships[x])

        for y in range(0, len(structure)):
            for x in range(0, len(input_relationships)):
                dtype = input_relationships[x]['type']
                if(dtype==structure[y]):
                    isa_structure.append(input_relationships[x])

    #print()
    print(json_normalize(isa_structure))
    return isa_structure


### READ DATA_FILE DATA, GET BLOB DATA
def readBlobData(session, headers_json, headers_token, url, data_id, data_type):
    result_json = json_for_resource(session, headers_json, url, data_type, data_id)#uses session
    
    filetitle = result_json['data']['attributes']['title']
    #print("Name of \'" + data_type + "\': " + filetitle + "\n")
    #print("Policy: ", result_json['data']['attributes']['policy'],"\n")
    filelicense = result_json['data']['attributes']['license']
    
    blob = result_json['data']['attributes']['content_blobs'][0]
    #print("Blob: ", blob,"\n")
    
    filename = blob['original_filename']
    filetype = blob['content_type']

    
    link = blob['link']
    download_link = link + "/download"
    #print("Download link is: " + download_link)
    
    #get blob data
    #response = urllib.request.urlopen(download_link)
    ###from urllib.request import urlopen, Request
    req = Request(url=download_link, headers=headers_token) 
    data = urlopen(req).read()
    
    #data = response.read()
    #print(response)
    #print(data)
    return result_json, filetitle, filename, filetype, filelicense, link, download_link, data


### REGISTER ASSAY
def registerAssay(session, in_assay_json, target_project_id, target_investigation_id, target_study_id, target_creator_id):
    new_assay_json = {}
    new_assay_json['data'] = {}
    new_assay_json['data']['type'] = 'assays'

    new_assay_json['data']['attributes'] = {}
    new_assay_json['data']['attributes']['title'] = in_assay_json['data']['attributes']['title']
    new_assay_json['data']['attributes']['description'] = in_assay_json['data']['attributes']['description']

    #new_assay_json['data']['attributes']['policy'] = assay_json['data']['attributes']['policy']
    new_assay_json['data']['attributes']['policy'] = {'access':'no_access'}
    new_assay_json['data']['attributes']['policy']['permissions'] = [{'resource':{'id':target_project_id,'type':'projects'},'access':'download'}];

    new_assay_json['data']['attributes']['assay_class'] = in_assay_json['data']['attributes']['assay_class']
    new_assay_json['data']['attributes']['assay_type'] = in_assay_json['data']['attributes']['assay_type']
    new_assay_json['data']['attributes']['technology_type'] = in_assay_json['data']['attributes']['technology_type']

    new_assay_json['data']['relationships'] = {}
    new_assay_json['data']['relationships']['creators'] = {}
    new_assay_json['data']['relationships']['creators']['data'] = [{'id' : target_creator_id, 'type' : 'people'}]
    new_assay_json['data']['relationships']['study'] = {}
    new_assay_json['data']['relationships']['study']['data'] = {'id' : target_study_id, 'type' : 'studies'}
    new_assay_json['data']['relationships']['investigation'] = {}
    new_assay_json['data']['relationships']['investigation']['data'] = {'id' : target_investigation_id, 'type' : 'investigations'}
    new_assay_json['data']['relationships']['projects'] = {}
    new_assay_json['data']['relationships']['projects']['data'] = {'id' : target_project_id, 'type' : 'projects'}

    r = session.post(target_base_url + '/assays', json=new_assay_json)
    r.raise_for_status()
    populated_assay = r.json()
    print("Registered assay: ", populated_assay)   
    assay_id = populated_assay['data']['id']
    
    return assay_id


### REGISTER DATA FILE AND BLOB 
def registerBlobData(session, base_url, data_type, filetitle, filelicense, blob, target_project_id, target_investigation_id, target_study_id, target_assay_id, target_creator_id):
    data_array_name = {}
    data_array_name['data'] = {}
    data_array_name['data']['type'] = data_type
    
    data_array_name['data']['attributes'] = {}
    data_array_name['data']['attributes']['title'] = filetitle
    data_array_name['data']['attributes']['license'] = filelicense #'CC-BY-4.0'
    #data_array_name['data']['attributes']['policy'] = {'access':'download'}
    data_array_name['data']['attributes']['policy'] = {'access':'no_access'}
    data_array_name['data']['attributes']['policy']['permissions'] = [{'resource':{'id':target_project_id,'type':'projects'},'access':'download'}];
    data_array_name['data']['attributes']['content_blobs'] = [blob] #error if blob is not there
        
    data_array_name['data']['relationships'] = {}
    data_array_name['data']['relationships']['projects'] = {}
    data_array_name['data']['relationships']['projects']['data'] = [{'id' : target_project_id, 'type' : 'projects'}]
    data_array_name['data']['relationships']['investigations'] = {}
    data_array_name['data']['relationships']['investigations']['data'] = [{'id' : target_investigation_id, 'type' : 'investigations'}]
    data_array_name['data']['relationships']['studies'] = {}
    data_array_name['data']['relationships']['studies']['data'] = [{'id' : target_study_id, 'type' : 'studies'}]
    data_array_name['data']['relationships']['assays'] = {}
    data_array_name['data']['relationships']['assays']['data'] = [{'id' : target_assay_id, 'type' : 'assays'}]
    data_array_name['data']['relationships']['creators'] = {}
    data_array_name['data']['relationships']['creators']['data'] = [{'id' : target_creator_id, 'type' : 'people'}]
    
    #register data file
    r = session.post(base_url + '/' + data_type, json = data_array_name)
    r.raise_for_status()

    populated_data_file = r.json()
    print("Registered data_file: ", populated_data_file["data"])
    #print("Registered json:")
    data_file_id = populated_data_file["data"]['id']
    data_file_link = populated_data_file['data']['attributes']['content_blobs'][0]['link']  

    
    return data_file_id, data_file_link


### UPLOAD BLOB INTO DATA FILE
def uploadBlobData(session, headers_json, headers_stream, base_url, data_type, blob_id, blob_url, binary_data):

    #get url from json content blob
    #blob_url = registered_json_data['data']['attributes']['content_blobs'][0]['link']    
 
    #PUT data
    upload = session.put(blob_url, data = binary_data, headers = headers_stream)
    upload.raise_for_status()
    
    #print content blob
    #blob_id = registered_json_data['data']['id']  
    created_json = json_for_resource(session, headers_json, base_url, data_type, blob_id)
    print("Uploaded blob data: ", created_json['data']['attributes']['content_blobs'])
    
        
### COMBINES REGISTER DATA FILE AND UPLOAD BLOB (not needed)
def TransferData(session, headers_json, headers_stream, base_url, data_type, filetitle, filelicense, blob, dataBinary,
    target_project_id, target_investigation_id, target_study_id, target_assay_id, target_creator_id): # register, upload
    #registered_json_data = registerBlobData(session, base_url, data_type, filetitle, blob)
    #uploadBlobData(session, base_url, data_type, registered_json_data, dataBinary)
    
    #target_data_file  = registerBlobData(
    #    session2, target_base_url, target_data_file_data_type, target_filetitle, target_filelicense, target_blob, 
    #    target_project_id, target_investigation_id, target_study_id, target_assay_id, target_creator_id2)
    #target_data_file_id = target_data_file[0]
    #target_data_file_link = target_data_file[1]
    
    #uploadBlobData(session2, headers3, target_base_url, target_data_file_data_type, target_data_file_id, target_data_file_link, dataBinary)
    
    target_data_file  = registerBlobData(
        session, base_url, data_type, filetitle, filelicense, blob, 
        target_project_id, target_investigation_id, target_study_id, target_assay_id, target_creator_id)
    target_data_file_id = target_data_file[0]
    target_data_file_link = target_data_file[1]
    
    print()
    uploadBlobData(session, headers_json, headers_stream, base_url, data_type, target_data_file_id, target_data_file_link, dataBinary)      


In [4]:
### AUTHENTICATION
headers1 = {"Accept": "application/vnd.api+json", "Accept-Charset": "ISO-8859-1"} #headers_json

API_TOKEN = open("token").readline().strip() #"user:password" encoded in base64
headers2 = { #headers_token
       "Authorization": "Basic %s" %API_TOKEN,
       'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}

headers3 = {'Content-Type': 'application/octet-stream'} #headers_stream

In [5]:
session1 = authenticate(headers1)

Username:dudasda
Password········


In [6]:
session2 = authenticate(headers1)

Username:dudasda
Password········


In [7]:
### SOURCE DATA PARAMETERS

source_base_url = 'http://localhost:3000'

### assay to be copied
source_assay_id = 4
source_data_type = 'assays'

### data: network image
#source_data_id = 38 # 22                 
#source_data_type = 'data_files'

### upwards structure of the data: 1 project 
#source_project_id = 1      # Default Project
##source_project_id = 2      # Project Alpha

In [8]:
### GET/READ/PRINT JSON RESOURCE (ASSAY)
assay_json = readJsonData(session1, headers1, source_base_url, source_assay_id, source_data_type)

Name of 'assays': Assay to be copied

{'data': {'id': '4', 'type': 'assays', 'attributes': {'policy': {'access': 'no_access', 'permissions': [{'resource': {'id': '2', 'type': 'projects'}, 'access': 'download'}]}, 'title': 'Assay to be copied', 'description': 'Description of an assay to be copied.', 'other_creators': None, 'assay_class': {'title': 'Experimental assay', 'key': 'EXP', 'description': None}, 'assay_type': {'label': 'Experimental Assay Type', 'uri': 'http://jermontology.org/ontology/JERMOntology#Experimental_assay_type'}, 'technology_type': {'label': 'Technology Type', 'uri': 'http://jermontology.org/ontology/JERMOntology#Technology_type'}, 'tags': None}, 'relationships': {'creators': {'data': [{'id': '1', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'organisms': {'data': []}, 'people': {'data': [{'id': '1', 'type': 'people'}]}, 'projects': {'data': [{'id': '2', 'type': 'projects'}]}, 'investigation': {'data': {'id': '3', 'type': 'investigatio

In [9]:
### PRINT RELATIONSHIPS OF A JSON
#grep_type = ['data_files', 'investigation', 'study', 'projects','creators','submitter','people']
source_relationships = printJsonDataRelationships(session1, headers1, source_base_url, assay_json)#, grep_type

creators
submitter
organisms
people
projects
investigation
study
data_files
models
sops
publications
documents

7 relationships found: 

   id              title            type
0   1      Dorotea Dudas        creators
1   1      Dorotea Dudas       submitter
2   1      Dorotea Dudas          people
3   2      Project Alpha        projects
4   3  investigation one  investigations
5   3          study one         studies
6  38      Network Image      data_files


In [10]:
### READ DATA FILE BLOB FROM SOURCE

###data file id(s) (could be read automatically)
#data_file_id = 38
data_file_id = source_relationships[6]['id']

#return result_json, filetitle, filename, filetype, filelicense, link, download_link, data
dataRead = readBlobData(session1, headers1, headers2, source_base_url, data_file_id, 'data_files')
print("Data Type:\t", source_data_type, "\nFile Type:\t", dataRead[3], "\nFile License:\t", dataRead[4])
print("File Name:\t", dataRead[2], "\nFile Title:\t", dataRead[1], "\nDownload link:\t", dataRead[6])
print("Binary Data:\t", dataRead[7][0:30], " etc.")

#data to be uploaded
dataBinary = dataRead[7]

Data Type:	 assays 
File Type:	 image/png 
File License:	 CC-BY-4.0
File Name:	 image_02.png 
File Title:	 Network Image 
Download link:	 http://localhost:3000/data_files/38/content_blobs/41/download
Binary Data:	 b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x02X\x00\x00\x01\x90\x08\x06\x00\x00\x00r'  etc.


In [11]:
### ISA STRUCTURE SOURCE (ASSAY) (only for assays, single data file) (not really doing anything)
isas = determineISAstructureFromRelationships(source_relationships)

   id              title            type
0   2      Project Alpha        projects
1   3  investigation one  investigations
2   3          study one         studies
3  38      Network Image      data_files
4   1      Dorotea Dudas        creators


In [12]:
### TARGET DATA PARAMETERS
### need to: register assay (get assay id), register data_file (get data_file id), upload blob

### DATABASE
#target_base_url = 'http://localhost:4000'
target_base_url = 'http://doroteadesktop:4000'

### ASSAY: register assay
#target_assay_id - will be obtained after registering an assay
target_assay_data_type = source_data_type #'assays'


##### ISA STRUCTURE (need to get it from Doro2Dom)

### UPWARDS STRUCTURE of the assay: 1 project, 1 investigation, 1 study 
#target_project_id = 1      # Default Project   (same id as source seek, since they are originaly clones)
target_project_id = 2       # Project Alpha     (same id as source seek, since they are originaly clones)
target_investigation_id = 3 # investigation one (same id as source seek, since they are originaly clones)
#target_study_id = 3        # study one         (same id as source seek, since they are originaly clones)
target_study_id = 4         # study two         (made just for this purpose, not in source seek)
target_creator_id = 1      # Dorotea           (same id as source seek, since they are originaly clones)
target_creator_id2 = 3      # Teodora           (same id as source seek, since they are originaly clones)

### DOWNWARDS STRUCTURE of the assay: 1 data file
#target_data_file_id             - will be obtained after registering a data_file
target_data_file_data_type = 'data_files'
target_filetitle = dataRead[1] + ' from DoroDom to Doro2Dom together with an Assay 3'
target_filelicense = dataRead[4]
# data_file blob
target_filename = dataRead[2]
target_filetype = dataRead[3]
target_blob = {'original_filename' : target_filename, 'content_type' : target_filetype}

In [13]:
### REGISTER ASSAY (in second seek and get the id) (TO THE STUDY IN THE 2nd SEEK (RESPECTING THE UPWARDS STRUCTURE))

target_assay_id = registerAssay(session2, assay_json, target_project_id, target_investigation_id, target_study_id, target_creator_id)
print()
print(target_assay_id)

Registered assay:  {'data': {'id': '14', 'type': 'assays', 'attributes': {'policy': {'access': 'no_access', 'permissions': [{'resource': {'id': '2', 'type': 'projects'}, 'access': 'download'}]}, 'title': 'Assay to be copied', 'description': 'Description of an assay to be copied.', 'other_creators': None, 'assay_class': {'title': 'Experimental assay', 'key': 'EXP', 'description': None}, 'assay_type': {'label': 'Experimental Assay Type', 'uri': 'http://jermontology.org/ontology/JERMOntology#Experimental_assay_type'}, 'technology_type': {'label': 'Technology Type', 'uri': 'http://jermontology.org/ontology/JERMOntology#Technology_type'}, 'tags': None}, 'relationships': {'creators': {'data': [{'id': '1', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'organisms': {'data': []}, 'people': {'data': [{'id': '1', 'type': 'people'}]}, 'projects': {'data': [{'id': '2', 'type': 'projects'}]}, 'investigation': {'data': {'id': '3', 'type': 'investigations'}}, 'study': {'

In [14]:
### REGISTER DATA FILE AND BLOB 

target_data_file  = registerBlobData(
    session2, target_base_url, target_data_file_data_type, target_filetitle, target_filelicense, target_blob, 
    target_project_id, target_investigation_id, target_study_id, target_assay_id, target_creator_id2)
target_data_file_id = target_data_file[0]
target_data_file_link = target_data_file[1]
print()
print(target_data_file_id, target_data_file_link)

Registered data_file:  {'id': '55', 'type': 'data_files', 'attributes': {'policy': {'access': 'no_access', 'permissions': [{'resource': {'id': '2', 'type': 'projects'}, 'access': 'download'}]}, 'title': 'Network Image from DoroDom to Doro2Dom together with an Assay 3', 'description': None, 'license': 'CC-BY-4.0', 'latest_version': 1, 'tags': None, 'versions': [{'version': 1, 'revision_comments': None, 'url': 'http://localhost:4000/data_files/55?version=1'}], 'version': 1, 'revision_comments': None, 'created_at': '2019-04-09T14:17:44.000Z', 'updated_at': '2019-04-09T14:17:44.000Z', 'content_blobs': [{'original_filename': 'image_02.png', 'url': None, 'md5sum': None, 'sha1sum': None, 'content_type': 'image/png', 'link': 'http://localhost:4000/data_files/55/content_blobs/58', 'size': None}], 'other_creators': None}, 'relationships': {'creators': {'data': [{'id': '3', 'type': 'people'}]}, 'submitter': {'data': [{'id': '1', 'type': 'people'}]}, 'people': {'data': [{'id': '1', 'type': 'people

In [15]:
### UPLOAD BLOB INTO DATA FILE IN TARGET DATABASE

uploadBlobData(session2, headers1, headers3, target_base_url, target_data_file_data_type, target_data_file_id, target_data_file_link, dataBinary)

Uploaded blob data:  [{'original_filename': 'image_02.png', 'url': None, 'md5sum': '8b614ae8a242fe8419099b6f77a9c9e5', 'sha1sum': '2898b02b0dfd0dd6615c555ec15ba0744a08b9ef', 'content_type': 'image/png', 'link': 'http://localhost:4000/data_files/55/content_blobs/58', 'size': 140675}]


In [None]:
### COMBINES REGISTER DATA FILE AND UPLOAD BLOB

#TransferData(session2, headers1, headers3, target_base_url, target_data_file_data_type, 
#             target_filetitle, target_filelicense, target_blob, dataBinary, 
#             target_project_id, target_investigation_id, target_study_id, target_assay_id, target_creator_id2)

In [None]:
"""Close the HTTP **session**"""
session.close()
session2.close()

In [None]:
### DELETE

#assay_url = populated_assay['data']['links']['self']
#study_url = populated_study['data']['links']['self']
#investigation_url = populated_investigation['data']['links']['self']

#session.delete(base_url + assay_url)
#session.delete(base_url + study_url)
#session.delete(base_url + investigation_url)