# Executing Squonk services

This notebook is an example of executing Squonk services using Python's requests module.

It assumes you are executing against the JobExector service running in an OpenShift environment.

In [1]:
import requests
import json

# requests_toolbelt module is used to handle the multipart responses.
# Need to `pip install requests-toolbelt` from a terminal to install. This might need doing each time the Notebook pod starts
from requests_toolbelt.multipart import decoder

In [2]:
# Define some URLs and params
base_url = 'https://jobexecutor.prod.openrisknet.org/jobexecutor/rest'
services_url = base_url + '/v1/services'
jobexecutor_url = base_url + '/v1/jobs'
keycloak_url = 'https://sso.prod.openrisknet.org/auth/realms/openrisknet/protocol/openid-connect/token'

# set to False if self signed certificates are being used
tls_verify=True

## Check basic operation

In [3]:
# Test the PING service. Should give a 200 response and return 'OK'.
# If not then nothing else is going to work.

url = base_url + '/ping'

print("Requesting GET " + url)
resp = requests.get(url, verify=tls_verify)
print('Response Code: ' + str(resp.status_code))
print(resp.text)

Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/ping
Response Code: 200
OK


## Authentication

In [5]:
# Need to specify your Keycloak SSO username and password so that we can get a token

import getpass
username = input('Username')
password = getpass.getpass('Password')

Username user1
Password ········


In [87]:
# Get token from Keycloak. This will have a finite lifetime.
# If your requests are getting a 401 error your token has probably expired.

data = {'grant_type': 'password', 'client_id': 'squonk-jobexecutor', 'username': username, 'password': password}
kresp = requests.post(keycloak_url, data = data)
j = kresp.json()
token = j['access_token']
token

'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJoX2p2Z3I3bWZ4VGJ3OHJLNW9Fb3dWWUVHUms2Z0hsLW9sSjdPUnQ3V2QwIn0.eyJqdGkiOiI5ZDFmYjQzYS02ZjMzLTQ2NzEtYjI4Zi04NGU3NDU1NjdlMWUiLCJleHAiOjE1NzIzNjYwNDYsIm5iZiI6MCwiaWF0IjoxNTcyMzU4ODQ2LCJpc3MiOiJodHRwczovL3Nzby5wcm9kLm9wZW5yaXNrbmV0Lm9yZy9hdXRoL3JlYWxtcy9vcGVucmlza25ldCIsImF1ZCI6ImFjY291bnQiLCJzdWIiOiI1Yzk2Y2IzMC05YmNiLTQ2NmUtOTg5NS02NmQwOGY5NmE3MzUiLCJ0eXAiOiJCZWFyZXIiLCJhenAiOiJzcXVvbmstam9iZXhlY3V0b3IiLCJhdXRoX3RpbWUiOjAsInNlc3Npb25fc3RhdGUiOiIyNDI5YmMxZS0zZmRlLTQ1MWEtYmRkNS03OWYyM2Y0OWFhNTYiLCJhY3IiOiIxIiwiYWxsb3dlZC1vcmlnaW5zIjpbImh0dHA6Ly9qb2JleGVjdXRvci5wcm9kLm9wZW5yaXNrbmV0Lm9yZyIsImh0dHBzOi8vam9iZXhlY3V0b3IucHJvZC5vcGVucmlza25ldC5vcmciXSwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbInN0YW5kYXJkLXVzZXIiLCJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJwcm9maWxlIGVtYWlsIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJ

## List all services

In [88]:
# Get a list of all the Squonk services that can be executed.
# 

print("Requesting GET " + services_url)
jobs_resp = requests.get(services_url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(str(len(json)) + " services found")
print(json)

Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/services
Response Code: 200
75 services found
[{'id': 'cdk.dataset.convert.molecule.format', 'name': 'Convert molecule format', 'description': 'Convert molecule format'}, {'id': 'pipelines.pli.v1', 'name': 'PLI docking scoring', 'description': 'PLI docking scoring'}, {'id': 'chemaxon.calculators.verify', 'name': 'Verify structure (ChemAxon)', 'description': 'Verify structure (ChemAxon)'}, {'id': 'cdk.donors_acceptors', 'name': 'HBA & HBD (CDK)', 'description': 'HBA & HBD (CDK)'}, {'id': 'pipelines.rdkit.maxminpicker.simple.1', 'name': 'RDKitMaxMinPickerSimple', 'description': 'RDKitMaxMinPickerSimple'}, {'id': 'chemaxon.calculators.logd', 'name': 'LogD (CXN)', 'description': 'LogD (CXN)'}, {'id': 'chemaxon.calculators.bpka', 'name': 'Basic pKa (CXN)', 'description': 'Basic pKa (CXN)'}, {'id': 'chemaxon.calculators.druglikefilter', 'name': 'Drug-like Filter (CXN)', 'description': 'Drug-like Filter (CXN)'}, {'id'

## Getting details of a particular service

In [89]:
# find the service ID from the list in the list services cell
#service_id = 'core.dataset.filter.slice.v1'
#service_id = 'pipelines.rdkit.conformer.basic'
service_id = 'pipelines.rdkit.o3da.basic'

url = services_url + '/' + service_id
print("Requesting GET " + url)
jobs_resp = requests.get(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(json)

Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/services/pipelines.rdkit.o3da.basic
Response Code: 200
{'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'Generate 3D alignments using Open3DAlign in RDKit', 'tags': ['rdkit', 'conformer', 'alignment', 'open3dalign', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'queryMol', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.opt

## List all jobs

In [90]:
# Result of the request is an array of JobStatus objects.
# The job ID and status are listed

print("Requesting GET " + jobexecutor_url)
jobs_resp = requests.get(jobexecutor_url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(str(len(json)) + " jobs found")
for status in json:
    print(status['jobId'] + ' ' + status['status'])

Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs
Response Code: 200
2 jobs found
c7f8199b-02cf-43e0-86b4-3bab89781c7f RESULTS_READY
1a46a1fc-193a-4f72-acd7-c81de1f096d4 RESULTS_READY


## Execute the 'Dataset Slice' service

In [91]:
# The 'Datast slice' takes a slice through a dataset specified by the number of records to skip and then the number to include.
# This is one of Squonk's 'internal' services.
# The job ID is stored in the job_id variable.

url = jobexecutor_url + '/core.dataset.filter.slice.v1'

data = {
    'options': '{"skip":2,"count":3}',
    'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

print("Requesting POST " + jobexecutor_url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs
Response Code: 201
{'jobId': 'fd0bc3b5-a9e8-4121-afd1-5d410d8297df', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572358867584, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DefaultServiceDescriptor', 'serviceConfig': {'id': 'core.dataset.filter.slice.v1', 'name': 'Dataset slice selector', 'description': 'Generate a defined slice of the dataset', 'tags': ['filter', 'slice', 'dataset'], 'icon': 'icons/filter.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-basic+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.BasicObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-basic+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryT

## Get the status of the current job

In [92]:
# The job is defined by the job_id variable and is probably the last job executed
url = jobexecutor_url + '/' + job_id + '/status'
print("Requesting GET " + url )
jobs_resp = requests.get(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
json

Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/fd0bc3b5-a9e8-4121-afd1-5d410d8297df/status
Response Code: 200


{'jobId': 'fd0bc3b5-a9e8-4121-afd1-5d410d8297df',
 'username': 'user1',
 'status': 'RESULTS_READY',
 'totalCount': 0,
 'processedCount': -3,
 'errorCount': -3,
 'started': 1572358867584,
 'completed': 1572358868113,
 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition',
  'serviceDescriptor': {'@class': 'org.squonk.core.DefaultServiceDescriptor',
   'serviceConfig': {'id': 'core.dataset.filter.slice.v1',
    'name': 'Dataset slice selector',
    'description': 'Generate a defined slice of the dataset',
    'tags': ['filter', 'slice', 'dataset'],
    'icon': 'icons/filter.png',
    'inputDescriptors': [{'name': 'input',
      'mediaType': 'application/x-squonk-dataset-basic+json',
      'primaryType': 'org.squonk.dataset.Dataset',
      'secondaryType': 'org.squonk.types.BasicObject'}],
    'outputDescriptors': [{'name': 'output',
      'mediaType': 'application/x-squonk-dataset-basic+json',
      'primaryType': 'org.squonk.dataset.Dataset',
      'secondaryType': 'org.

## Get the results of a job. 

In [93]:
# The job is defined by the job_id variable and is probably the last job executed.
# The status of the job needs to be 'RESULTS_READY'
# The response is a multipart response, typically containing the job status, the results metadata and the results data.
# This method can be called for a job any number of times until the job is deleted.

url = jobexecutor_url + '/' + job_id + '/results'
print("Requesting GET " + url )
jobs_resp = requests.get(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
multipart_data = decoder.MultipartDecoder.from_response(jobs_resp)
for part in multipart_data.parts:
    print(part.content)
    print(part.headers)

Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/fd0bc3b5-a9e8-4121-afd1-5d410d8297df/results
Response Code: 200
b'{"jobId":"fd0bc3b5-a9e8-4121-afd1-5d410d8297df","username":"user1","status":"RESULTS_READY","totalCount":0,"processedCount":-3,"errorCount":-3,"started":1572358867584,"completed":1572358868113,"jobDefinition":{"@class":"org.squonk.jobdef.ExternalJobDefinition","serviceDescriptor":{"@class":"org.squonk.core.DefaultServiceDescriptor","serviceConfig":{"id":"core.dataset.filter.slice.v1","name":"Dataset slice selector","description":"Generate a defined slice of the dataset","tags":["filter","slice","dataset"],"icon":"icons/filter.png","inputDescriptors":[{"name":"input","mediaType":"application/x-squonk-dataset-basic+json","primaryType":"org.squonk.dataset.Dataset","secondaryType":"org.squonk.types.BasicObject"}],"outputDescriptors":[{"name":"output","mediaType":"application/x-squonk-dataset-basic+json","primaryType":"org.squonk.dataset.Dataset"

## Delete the job

In [94]:
#  Once you have fetched the results you MUST delete the job.
# The job is defined by the job_id variable and is probably the last job executed.

url = jobexecutor_url + '/' + job_id
print("Requesting DELETE " + url)
jobs_resp = requests.delete(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
if 'status' in json and json['status'] == 'COMPLETED':
    print('Job deleted')
else:
    print('Problem deleting job')

Requesting DELETE https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/fd0bc3b5-a9e8-4121-afd1-5d410d8297df
Response Code: 200
Job deleted


##  Delete all jobs
This is to help clean up if you get into a mess!

In [95]:
# Delete all jobs

# First get the current jobs
jobs_resp = requests.get(jobexecutor_url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print('Found ' + str(len(json)) + ' jobs')

# Now go through them and delete
# If successful the status of the job will then be COMPLETED.
for job in json:
    id = job['jobId']
    url = jobexecutor_url + '/' + id
    print("Deleting " + url)
    jobs_resp = requests.delete(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
    j = jobs_resp.json()
    print("Status: " + j['status'])

Response Code: 200
Found 2 jobs
Deleting https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/c7f8199b-02cf-43e0-86b4-3bab89781c7f
Status: COMPLETED
Deleting https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/1a46a1fc-193a-4f72-acd7-c81de1f096d4
Status: COMPLETED


## Other services

In addition to the simple 'dataset slice' service many more meaningful ones are available.

Here are some examples illustrating the different categories of Squonk services:

1. Built in services running within the job executor Java process. These are limited to very simple and very fast operations
1. HTTP services running in the chemservices module that stream results and are designed for relatively short term execution (seconds or at most a few minutes) with the results being streamed immediately back to the requester.
1. Services running in a Docker container given the input data as files and writing the results as files. These are designed for more flexible implementation of services that can take longer to execute.
1. Nextflow services. Similar to Docker services, but defined as a Nextflow workflow that typically allows parallel execution on the K8S cluster or potentionally on an external cluster.

Execute one of these instead of the dataset slice one above.

In [69]:
# The 'Lipinski filter' takes calculates the classical rule of five properties and allows to filter based on these.
# We have implementations for ChemAxon and RDKit. Here we use the RDKit one.
# The default filter is the classical drug-likeness one defined by Lipinski but you can specify your owwn criteria instaead.
# This is one of Squonk's 'HTTP' services.
# The job ID is stored in the job_id variable.

url = jobexecutor_url + '/rdkit.calculators.lipinski'

data = {
    'options': '{"filterMode":"INCLUDE_PASS"}',
    'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/rdkit.calculators.lipinski
Response Code: 201
{'jobId': 'af9060da-89a3-45dc-b46e-ad8861cc95c5', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572351678988, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.HttpServiceDescriptor', 'serviceConfig': {'id': 'rdkit.calculators.lipinski', 'name': 'Lipinski (RDKit)', 'description': 'Lipinski rule of 5 filter using RDKit', 'tags': ['lipinski', 'ruleoffive', 'ro5', 'hbond', 'donors', 'acceptors', 'logp', 'molecularweight', 'druglike', 'molecularproperties', 'filter', 'rdkit'], 'resourceUrl': '/docs/cells/Lipinski%20filter%20(RDKit)/', 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 's

In [23]:
# passing data as SDF

url = jobexecutor_url + '/rdkit.calculators.lipinski'

data = {
    'options': '{"filterMode":"INCLUDE_PASS"}',
    'input': ('input', open('Kinase_inhibs.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/rdkit.calculators.lipinski
Response Code: 201
{'jobId': '86d744c4-f0a5-4a4f-9418-b3dd0dba1f6f', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572349391457, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.HttpServiceDescriptor', 'serviceConfig': {'id': 'rdkit.calculators.lipinski', 'name': 'Lipinski (RDKit)', 'description': 'Lipinski rule of 5 filter using RDKit', 'tags': ['lipinski', 'ruleoffive', 'ro5', 'hbond', 'donors', 'acceptors', 'logp', 'molecularweight', 'druglike', 'molecularproperties', 'filter', 'rdkit'], 'resourceUrl': '/docs/cells/Lipinski%20filter%20(RDKit)/', 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 's

In [26]:
# sucos scoring passing 2 inputs as SDF

url = jobexecutor_url + '/pipelines.rdkit.sucos.basic'

data = {
    'options': '{}',
    'input': ('input', open('mols.sdf', 'rb'), 'chemical/x-mdl-sdfile'),
    'target': ('target', open('benzene.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/pipelines.rdkit.sucos.basic
Response Code: 201
{'jobId': '19e7a237-1980-4c3c-9efc-7ce804410275', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572349506478, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.sucos.basic', 'name': 'RDKitSuCOS', 'description': 'Generate 3D overlay using SuCOS in RDKit', 'tags': ['rdkit', 'alignment', 'sucos', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'target', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.d

In [74]:
# open3dAlign scoring passing 2 inputs as SDF
# passing the queryMol as pyrimethamine.mol does not work - it needs tob e converted to SDF

url = jobexecutor_url + '/pipelines.rdkit.o3da.basic'

data = {
    'options': '{"arg.crippen":"false"}',
    'input': ('input', open('dhfr_3d.sdf', 'rb'), 'chemical/x-mdl-sdfile'),
    'queryMol': ('queryMol', open('pyrimethamine.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/pipelines.rdkit.o3da.basic
Response Code: 201
{'jobId': '0660dae6-5553-4553-8533-3d9e9353e8d4', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572351791974, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'Generate 3D alignments using Open3DAlign in RDKit', 'tags': ['rdkit', 'conformer', 'alignment', 'open3dalign', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'queryMol', 'mediaType': 'application/x-squonk-dataset-molecule+j

In [64]:
# open3dAlign scoring passing inputs as dataset and query as SDF

url = jobexecutor_url + '/pipelines.rdkit.o3da.basic'

data = {
    'options': '{"arg.crippen":"false"}',
    'input_data': ('input_data', open('dhfr_3d.data.gz', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('dhfr_3d.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json'),
    'queryMol': ('queryMol', open('pyrimethamine.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/pipelines.rdkit.o3da.basic
Response Code: 201
{'jobId': 'c7300679-1a58-438b-b6d6-deaa434a0783', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572351520464, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'Generate 3D alignments using Open3DAlign in RDKit', 'tags': ['rdkit', 'conformer', 'alignment', 'open3dalign', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'queryMol', 'mediaType': 'application/x-squonk-dataset-molecule+j

In [19]:
# The 'Conformer generator' used RDKit ETKDG conformer generation tool to generate a number of conformers for the input structures.
# This is one of Squonk's 'Docker' services.
# The job ID is stored in the job_id variable.

service_id = 'pipelines.rdkit.conformer.basic'

data = {
    'options': '{"arg.num":10,"arg.method":"RMSD"}',
    'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

jobs_resp = requests.post(jobexecutor_url + '/' + service_id, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Response Code: 201
{'jobId': 'c00e90ba-8045-4c1c-9ab4-7e8d47e82f89', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572348726866, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.conformer.basic', 'name': 'RDKitConformers', 'description': 'Generate 3D conformers using RDKit', 'tags': ['rdkit', 'conformer', '3d', 'docker'], 'icon': 'icons/molecule_generator.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors':

In [171]:
# Similarity screening using RDKit.
# This is one of Squonk's 'Nextflow' services.
# The job ID is stored in the job_id variable.

# NOTE: THIS IS NOT WORKING AS THE QUERY STRUCTURE IS NOT BEING PASSED CORRECTLY

service_id = 'pipelines.rdkit.screen.basic'

data = {
    'options': '{"arg.query":{"source":"CC1=CC(=O)C=CC1=O","format":"smiles"},"arg.sim":{"minValue":0.5,"maxValue":1.0}}',
    'input_data': ('input_data', open('nci10_data.json', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10_meta.json', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

jobs_resp = requests.post(jobexecutor_url + '/' + service_id, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)

Response Code: 201
{'jobId': '4a67ebdb-603f-4458-99fb-b23263edd176', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1556724503937, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.NextflowServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.screen.basic', 'name': 'RDKitSimilarityScreening', 'description': 'RDKit Similarity Screening', 'tags': ['rdkit', 'screening', 'similarity', 'docker', 'nextflow'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'o