In [54]:
import requests

# Vantage6 API for the RAVEN
This notebook *should* contain all the code needed to interact with the vantage6 API 
from the RAVEN UI.

This following section are included in the notebook:

  1. Authenticate with the vantage6 server - This will change as we got a last minute  
     request to use KeyCloak for authentication.
  2. Creating prerequisites - This is **static** content which should already be at the   
     vantage6 server. This is also not needed in case of the RAVEN UI, and you can skip  
     this section as I already have created the required content.
  3. Creating the cohorts from the patient IDs
  4. Running other analytics

...  

## 1. Authenticate with the vantage6 server

In [55]:
# I am using the old way of authenticating with the vantage6 server which is going to
# change in the near future as we are using KeyCloak for authentication.

# In the new scenario the users will be authenticated using their own credentials (users
# need to be created in vantage6 static content).

# The following code is used to authenticate with the vantage6 server.
auth_response = requests.post(
    "https://orchestrator.idea.lst.tfo.upm.es/server/token/user",
    json={
        "username": "root",
        "password": "root"
    }
)
# The response is a JSON object with the access token and the refresh token
auth_response.json()


{'access_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTc1MDkyODExMiwianRpIjoiNmFhOTgwZGYtOTcyZC00MmM2LWE2ZDUtZTY2MzA4ZjQ2NWVmIiwidHlwZSI6ImFjY2VzcyIsInN1YiI6MSwibmJmIjoxNzUwOTI4MTEyLCJleHAiOjE3NTA5NDk3MTIsImNsaWVudF90eXBlIjoidXNlciIsInJvbGVzIjpbIlJvb3QiXX0.XYLBTup0-YiOgyVxl2a2Tdn5VDjaVeCploR2-oV88gE',
 'refresh_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTc1MDkyODExMiwianRpIjoiOTZmY2ZmZTktYzE4My00YzM5LTlkZmEtMzg4NmQyMzlhMzdiIiwidHlwZSI6InJlZnJlc2giLCJzdWIiOjEsIm5iZiI6MTc1MDkyODExMiwiZXhwIjoxNzUxMTAwOTEyLCJjbGllbnRfdHlwZSI6InVzZXIiLCJyb2xlcyI6WyJSb290Il19.vVyEK-0qs7_6CfgP7cuSbtqkAFqptWRvPvLNR9-D5es',
 'refresh_url': '/server/token/refresh',
 'user_url': '/server/user/1'}

In [56]:
# Create the headers for the subsequent requests
headers = {
    "Authorization": f"Bearer {auth_response.json()['access_token']}"
}

## 2. Creating prerequisites (Static content)
This is 'static' content which should already be at the vantage6 server. The vantage6  
UI can be used to manage the 'static' content. It is static from the point of view of   
RAVEN UI.

**YOU DO NOT NEED TO CREATE THESE, YOU CAN SKIP THIS SECTION.** 

In [57]:
# I re-authenticate in order to use the vantage6 client library. So the actual calls
# in this session are hidden, but you don't need them anyway.

# !pip install vantage6-client
from vantage6.client import Client

# create the organizations
client = Client(
    "https://orchestrator.idea.lst.tfo.upm.es", 443, "/server", log_level="INFO"
)
client.authenticate("root", "root")

 Welcome to
                  _                     __  
                 | |                   / /  
__   ____ _ _ __ | |_ __ _  __ _  ___ / /_  
\ \ / / _` | '_ \| __/ _` |/ _` |/ _ \ '_ \ 
 \ V / (_| | | | | || (_| | (_| |  __/ (_) |
  \_/ \__,_|_| |_|\__\__,_|\__, |\___|\___/ 
                            __/ |           
                           |___/            

 --> Join us on Discord! https://discord.gg/rwRvwyK
 --> Docs: https://docs.vantage6.ai
 --> Blog: https://vantage6.ai
------------------------------------------------------------
Cite us!
If you publish your findings obtained using vantage6, 
please cite the proper sources as mentioned in:
https://vantage6.ai/vantage6/references
------------------------------------------------------------
Successfully authenticated
 --> Succesfully authenticated
 --> Name: None (id=1)
 --> Organization: root (id=1)


In [58]:
# get the server info
print("Server version: ", client.util.get_server_version())

Server version:  {'version': '5.0.0a18'}


### 2.1 Create the organizations

In [59]:
# client.organization.create(
#     name="Example Organization 1",
#     address1="123 Main St",
#     address2="Apt 1",
#     zipcode="1234AB",
#     country="NL",
#     domain="example-organization-1.com",
# )

In [60]:
# client.organization.create(
#     name="Example Organization 2",
#     address1="123 Main St",
#     address2="Apt 2",
#     zipcode="1234AB",
#     country="NL",
#     domain="example-organization-2.com",
# )

The organizations are created. All organization have an ID which can be used to 
identify the organization at a later stage.

** DID WE LINK THE ORGANIZATION TO THE RAVEN UI? **

In [61]:
client.organization.list(fields=('id', 'name'))

[{'id': 3, 'name': 'Example Organization 2'},
 {'id': 2, 'name': 'Example Organization 1'},
 {'id': 1, 'name': 'root'}]

### 2.2 Create the users

In [62]:
client.role.list(fields=('id', 'name'))

[{'id': 2, 'name': 'container'},
 {'id': 1, 'name': 'Root'},
 {'id': 5, 'name': 'Researcher'},
 {'id': 6, 'name': 'Organization Admin'},
 {'id': 4, 'name': 'Viewer'},
 {'id': 3, 'name': 'node'},
 {'id': 7, 'name': 'Collaboration Admin'}]

In [63]:
# client.user.create(
#     username="user1",
#     password="Password123!",
#     email="user1@example-organization-1.com",
#     firstname="User 1",
#     lastname="User 1",
#     organization=2,
#     roles=[6]
# )

In [64]:
# client.user.create(
#     username="user2",
#     password="Password123!",
#     email="user2@example-organization-2.com",
#     firstname="User 2",
#     lastname="User 2",
#     organization=3,
#     roles=[6]
# )

In [65]:
# client.user.create(
#     username="raven",
#     password="Password123!",
#     email="raven@example-organization-2.com",
#     firstname="Raven",
#     lastname="Raven",
#     organization=2,
#     roles=[7]
# )

In [66]:
client.user.list(fields=('id', 'username'))

[{'id': 4, 'username': 'raven'},
 {'id': 1, 'username': 'root'},
 {'id': 2, 'username': 'user1'},
 {'id': 3, 'username': 'user2'}]

### 2.3 Create the collaboration

In [67]:
# client.collaboration.create(
#     name="Example Collaboration 1",
#     organizations=[2, 3]
# )

In [68]:
client.collaboration.list(fields=('id', 'name'), scope="global")

[{'id': 1, 'name': 'Example Collaboration 1'}]

### 2.4 Create the nodes

In [69]:
# client.node.create(
#     collaboration=1,
#     organization=2,
#     name="Organization 2 Node 1",
# )

In [70]:
# client.node.create(
#     collaboration=1,
#     organization=3,
#     name="Organization 3 Node 1",
# )

In [71]:
client.node.list(fields=("id", "name", "status"))

[{'id': 5, 'name': 'Organization 2 Node 1', 'status': 'offline'},
 {'id': 6, 'name': 'Organization 3 Node 1', 'status': 'offline'}]

## 3. New Workspace
*New study in vantage6*

In [72]:
# When a new workspace in RAVEN is created we need to create a new study in vantage6.
# A study in vantage6 is a collection of organizations that are allowed to be used in
# the computations.

# We first login as the RAVEN admin user and get the token. Or we could give permission
# to the RAVEN user to create the study. TODO: This needs to be decided.
auth_response = requests.post(
    "https://orchestrator.idea.lst.tfo.upm.es/server/token/user",
    json={
        "username": "raven",
        "password": "Password123!"
    }
)
headers = {"Authorization": f"Bearer {auth_response.json()['access_token']}"}
auth_response.json()

{'access_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTc1MDkyODEzMCwianRpIjoiMGExZWZlM2UtZTNmOS00MGM2LTk5ZjctNWQ4NjllNDIyMTRmIiwidHlwZSI6ImFjY2VzcyIsInN1YiI6NCwibmJmIjoxNzUwOTI4MTMwLCJleHAiOjE3NTA5NDk3MzAsImNsaWVudF90eXBlIjoidXNlciIsInJvbGVzIjpbIlJvb3QiXX0.QPZX7dxXTGRB5WdYiT8y3J63wdiL0DyZp1YGXx6Etxk',
 'refresh_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTc1MDkyODEzMCwianRpIjoiMzdlMDhhMTEtYTJkNC00M2ZhLTk3MjMtMWFiNzhmYWQ4MjVjIiwidHlwZSI6InJlZnJlc2giLCJzdWIiOjQsIm5iZiI6MTc1MDkyODEzMCwiZXhwIjoxNzUxMTAwOTMwLCJjbGllbnRfdHlwZSI6InVzZXIiLCJyb2xlcyI6WyJSb290Il19.nJkpefM3C-hMdUvvphhEE9eBo6aMOGSMp5YXRMJv0Sk',
 'refresh_url': '/server/token/refresh',
 'user_url': '/server/user/4'}

In [74]:
# To create a new study we need the organizations ids (the internal ids in vantage6)
# that are included in this workspace. The name of the study needs to be unique.
# TODO: we need to decide on the unique name of the study, this could be the name of
# the workspace.
response = requests.post(
    "https://orchestrator.idea.lst.tfo.upm.es/server/study",
    headers=headers,
    json={
        # The collaboration id is the vantage6 id of the collaboration. This is
        # is the same for all workspaces. I used 1 now, but this can change when we
        # are still developing the platform.
        "collaboration_id": 1,
        # The name of the study needs to be unique. I guess the name of the workspace
        # is also unique, so we can use that.
        "name": "Example Study 5",
        # The organization ids are the internal ids of the organizations in vantage6.
        "organization_ids": [2, 3],
    }
)
response.json()
# In the case that:
#
# - The name is not unique
# - The collaboration id is not valid (non existing)
# - The organization ids are not valid (non existing)
#
# The API will return a 4xx error with a message. It will be of the following format:
# {
#     "msg": "Error message",
# }

{'collaboration': {'id': 1,
  'link': '/server/collaboration/1',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'id': 5,
 'name': 'Example Study 5',
 'tasks': '/server/task?study_id=5',
 'organizations': [{'studies': '/server/study?organization_id=2',
   'runs': '/server/run?organization_id=2',
   'address2': 'Apt 1',
   'nodes': '/server/node?organization_id=2',
   'domain': 'example-organization-1.com',
   'public_key': '',
   'users': '/server/user?organization_id=2',
   'name': 'Example Organization 1',
   'zipcode': '1234AB',
   'country': 'NL',
   'address1': '123 Main St',
   'id': 2,
   'tasks': '/server/task?init_org_id=2',
   'collaborations': '/server/collaboration?organization_id=2'},
  {'studies': '/server/study?organization_id=3',
   'runs': '/server/run?organization_id=3',
   'address2': 'Apt 2',
   'nodes': '/server/node?organization_id=3',
   'domain': 'example-organization-2.com',
   'public_key': '',
   'users': '/server/user?organization_id=3',
   'name': 'Example Organ

In [75]:
# You can always view all studies. This endpoint is not necessarily needed for the
# RAVEN UI but I thought it would be useful to have it here.
response = requests.get("https://orchestrator.idea.lst.tfo.upm.es/server/study", headers=headers)
response.json()["data"]

[{'collaboration': {'id': 1,
   'link': '/server/collaboration/1',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'id': 2,
  'name': 'Example Study 2',
  'tasks': '/server/task?study_id=2',
  'organizations': '/server/organization?study_id=2'},
 {'collaboration': {'id': 1,
   'link': '/server/collaboration/1',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'id': 3,
  'name': 'Example Study 3',
  'tasks': '/server/task?study_id=3',
  'organizations': '/server/organization?study_id=3'},
 {'collaboration': {'id': 1,
   'link': '/server/collaboration/1',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'id': 1,
  'name': 'Example Study 1',
  'tasks': '/server/task?study_id=1',
  'organizations': '/server/organization?study_id=1'},
 {'collaboration': {'id': 1,
   'link': '/server/collaboration/1',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'id': 5,
  'name': 'Example Study 5',
  'tasks': '/server/task?study_id=5',
  'organizations': '/server/organization?study_id=5'},
 {'collaboration': {'id': 1,

In [76]:
# You can also view the organizations that are part of a study. This endpoint is not
# necessarily needed for the RAVEN UI but I thought it would be useful to have it here.
response = requests.get("https://orchestrator.idea.lst.tfo.upm.es/server/organization?study_id=3", headers=headers)
response.json()["data"]

[{'studies': '/server/study?organization_id=2',
  'runs': '/server/run?organization_id=2',
  'address2': 'Apt 1',
  'nodes': '/server/node?organization_id=2',
  'domain': 'example-organization-1.com',
  'public_key': '',
  'users': '/server/user?organization_id=2',
  'name': 'Example Organization 1',
  'zipcode': '1234AB',
  'country': 'NL',
  'address1': '123 Main St',
  'id': 2,
  'tasks': '/server/task?init_org_id=2',
  'collaborations': '/server/collaboration?organization_id=2'},
 {'studies': '/server/study?organization_id=3',
  'runs': '/server/run?organization_id=3',
  'address2': 'Apt 2',
  'nodes': '/server/node?organization_id=3',
  'domain': 'example-organization-2.com',
  'public_key': '',
  'users': '/server/user?organization_id=3',
  'name': 'Example Organization 2',
  'zipcode': '1234AB',
  'country': 'NL',
  'address1': '123 Main St',
  'id': 3,
  'tasks': '/server/task?init_org_id=3',
  'collaborations': '/server/collaboration?organization_id=3'}]

## 4. New Analysis
*New session in vantage6*

In [77]:
# When a new analysis is created in RAVEN we need to create a new session in vantage6.
# A session is a file space on the data stations in which we can store dataframes (an
# extraction of the data from the OMOP database). We need the study id which should be
# stored in the workspace in order to create the session.
response = requests.post(
    "https://orchestrator.idea.lst.tfo.upm.es/server/session",
    headers=headers,
    json={
        # The collaboration id is the vantage6 id of the collaboration. This is
        # is the same for all workspaces. I used 1 now, but this can change when we
        # are still developing the platform.
        "collaboration_id": 1,
        # The name of the session needs to be unique within the collaboration, so in the
        # case of IDEA4RC this needs to always be unique. I would use the analysis ID to
        # create a unique name.
        "name": "Example Session 7",
        # The study id should be linked to the workspace.
        "study_id": 3,
        # The scope is the scope of the session. In IDEA4RC we use the collaboration
        # scope. This means that others users can use the same session.
        "scope": "collaboration"
    }
)
response.json()
# In the case that:
#
# - The name is not unique
# - The study id is not valid (non existing)
# - The scope is not valid (only 'collaboration' should be used)
# - The collaboration id is not valid (non existing)
#
# The API will return a 4xx error with a message. It will be of the following format:
# {
#     "msg": "Error message",
# }

{'dataframes': '/server/session/6/dataframe',
 'created_at': '2025-06-24T04:17:54.751457',
 'collaboration': {'id': 1,
  'link': '/server/collaboration/1',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'ready': True,
 'scope': 'col',
 'owner': {'id': 4,
  'link': '/server/user/4',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'name': 'Example Session 7',
 'study': {'id': 3,
  'link': '/server/study/3',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'id': 6,
 'tasks': '/server/task?session_id=6',
 'last_used_at': '2025-06-24T04:17:54.751526'}

## 5. New cohort
*Create a new dataframe in vantage6*

In [78]:
# When a new cohort is created vantage6 needs to extract the data from the OMOP database
# and store it in the session as a dataframe. This is done by executing a vantage6
# extraction task.

#
# Static content
#
collaboration_id = 1
image = "https://harbor2.vantage6.ai/idea4rc/sessions:latest"
label = "omop"

#
# Dynamic content
#
study_id = 2 # related to the workspace
session_id = 2 # related to the analysis

# The name of the cohort, this should be unique within a session. You can probably use
# the same name that you use in the RAVEN UI. Alternatively, we can also not send it.
# In that case the name will be generated by vantage6.
name = "Cohort name 2"

# Each `image` can have multiple `methods`. We need to use a different method for
# sarcoma and head and neck as we are extracting different features.
method = "create_cohort"

# The input for the task is the patient ids and which features we want to extract.
arguments = {
    "patient_ids": [1, 2, 3], # These should be coming from the cohort builder in RAVEN
    "features": "sarcoma" # or "head_neck" in case of head and neck
}

In [79]:
# In vantage6 we can always see who are included in the study, but this request is
# probably redundant as you already know the (v6) organizations ID at this point. When
# we create a new cohort we do that for all organizations in the study.
orgs = requests.get(
    "https://orchestrator.idea.lst.tfo.upm.es/server/organization",
    params={"per_page": 999, "study_id": study_id},
    headers=headers
).json()
org_ids = [org["id"] for org in orgs["data"]]
org_ids

[2, 3]

In [80]:
# before we can create a task we need to prepare task instructions. In vantage6 we can
# (but we dont in IDEA4RC) use end-to-end encryption, therefore we need to store the
# input for each organization individually.
import json
import base64
payload = {
    "label": label,
    "name": name, # optional, v6 will generate a name if not provided
    "task": {
        "method": method,
        "image": image,
        # In vantage6 we can (but we dont in IDEA4RC) use end-to-end encryption,
        # therefore we need to store the input for each organization individually.
        "organizations": [
            {
                "id": id_,
                "input": base64.b64encode(
                    json.dumps(arguments).encode("UTF-8")
                ).decode("UTF-8")
            }
            for id_ in org_ids
        ]
    }
}
payload

{'label': 'omop',
 'name': 'Cohort name 2',
 'task': {'method': 'create_cohort',
  'image': 'https://harbor2.vantage6.ai/idea4rc/sessions:latest',
  'organizations': [{'id': 2,
    'input': 'eyJwYXRpZW50X2lkcyI6IFsxLCAyLCAzXSwgImZlYXR1cmVzIjogInNhcmNvbWEifQ=='},
   {'id': 3,
    'input': 'eyJwYXRpZW50X2lkcyI6IFsxLCAyLCAzXSwgImZlYXR1cmVzIjogInNhcmNvbWEifQ=='}]}}

In [47]:
# Create a vantage6 task to extract the data from the omop data source and store it
# into a dataframe.
response = requests.post(
    f"https://orchestrator.idea.lst.tfo.upm.es/server/session/{session_id}/dataframe",
    headers=headers,
    json=payload
)
response.json()

{'ready': True,
 'last_session_task': {'job_id': 5,
  'children': '/server/task?parent_id=5',
  'study': {'id': 2,
   'link': '/server/study/2',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'algorithm_store': None,
  'created_at': '2025-06-25T14:02:54.098885',
  'init_org': {'id': 2,
   'link': '/server/organization/2',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'id': 5,
  'image': 'https://harbor2.vantage6.ai/idea4rc/sessions:latest',
  'session': {'id': 2,
   'link': '/server/session/2',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'results': '/server/result?task_id=5',
  'status': 'awaiting',
  'required_by': [],
  'depends_on': [],
  'runs': '/server/run?task_id=5',
  'method': 'create_cohort',
  'description': 'Data extraction step for session Example Session 2 (2).This session is in the Example Collaboration 1 collaboration. Data extraction is done on the omop database, and the dataframe name will be Cohort name 1.',
  'finished_at': None,
  'collaboration': {'id': 1,
   'link'

### FROM HERE ON WE NEED TEST NODES IN THE CAPSULE WITH SOME DATA.  
### I WILL TRY TO GET THEM UP BEFORE MY HOLIDAY. THE PROCESS FOR ALL THESE STEPS  
### IS VERY SIMILAR TO THE CREATION OF THE DATAFRAME

In [68]:
# TODO:
#   - Poll for the dataframe status
#   - Compute summary statistics on the dataframe
#   - Poll for the summary statistics results
#   - Create analytics
#   - Poll for analytics results
#   - Start preprocessing task
#   - Poll for the preprocessing task to finish

## 6. Summary statistics
Before we can display the summary statistics we need to calculate them. This is done through a vantage6 algorithm. 

We first need to be sure the dataframe is ready to be used. Then we can execute the algorithm and await the results to be displayed.

# 7. Collect algorithm metadata
*Collect input arguments and their types*

In [None]:
# Obtain a list of all available algorithms in the algorithm store. For now only the
# extraction algorithm is in there (we've used it already when creating the cohort,
# see that the `image` is `https://harbor2.vantage6.ai/idea4rc/sessions:latest`).
#
# Each algorithm has one or more `functions`. The `functions` are actual Python functions
# that are executed on the data stations. A `function` in vantage6 expects a specific
# set of attributes that can be modified by the user. You should visualize these in a
# form in the RAVEN UI.
#
# 1. `databases`. A list of databases (typically only one) that will be supplied by the
#    node based on the `label` or `dataframe_id`. A `label` refers to the OMOP
#    database (in the IDEA4RC case) and is only used for the extraction algorithm (=
#    `create_cohort`). The `dataframe_id` refers to the cohort dataframe which can be
#    used for the analysis. In the extraction job we do not let the user select the
#    database, as we always use the OMOP database. So no need to visualize this. In the
#    analysis job we do let the user select the database, this happens when the user
#    selects a set of cohorts.
# 2. `arguments`. A list of arguments that can be modified by the user. In the case of
#    the extraction algorithm we have two arguments: `patient_ids` and `features`. The
#    `patient_ids` should be the list of patients that are comming from the cohort
#    builder and the `features` should be the of the tumor type that was selected in the
#    RAVEN workspace.
#
# The other important metadata are:
#
# 1. `name`. The method name, depending on which method the user selects in the UI
#    different arguments need to be provided. You also need to provide this `name` in
#    the `method` when creating a vantage6 task.
# 2. `image`. The image is the docker image that will be used to execute the function.
#    You also need to supply this when creating a vantage6 task.
requests.get(
    "https://orchestrator.idea.lst.tfo.upm.es/store/algorithm",
    headers=headers
).json()

{'data': [{'reviews': '/store/review?algorithm_id=1',
   'status': 'approved',
   'vantage6_version': '5.0.0a19',
   'submission_comments': '',
   'image': 'harbor2.vantage6.ai/idea4rc/sessions',
   'partitioning': 'horizontal',
   'functions': [{'databases': [{'description': 'The OMOP database',
       'id': 1,
       'name': 'database'}],
     'display_name': 'Create cohort',
     'step_type': 'data extraction',
     'description': 'Extract a cohort from the OMOP database',
     'standalone': True,
     'name': 'create_cohort',
     'id': 1,
     'arguments': [{'default_value': '',
       'conditional_on_id': None,
       'conditional_operator': None,
       'conditional_value': None,
       'display_name': 'Patient IDs',
       'description': 'List of patient IDs to extract',
       'type': 'integer_list',
       'name': 'patient_ids',
       'is_frontend_only': False,
       'id': 1,
       'has_default_value': False},
      {'default_value': 'sarcoma',
       'conditional_on_id': 

## 8. Create analytics (TODO)
- Hardcoded algorithms
- algorithm metadata (parameters, etc.)