In [12]:
import os
import requests

# Vantage6 API for the RAVEN
This notebook *should* contain all the code needed to interact with the vantage6 API 
from the RAVEN UI.

This following section are included in the notebook:

  1. Enable self-signed certificates - This is not needed in case 
     of the RAVEN UI, as you probably already have a CA bundle. But I left it in for 
     reference.
  2. Authenticate with the vantage6 server - This will change as we got a last minute
     request to use KeyCloak for authentication.
  3. Creating prerequisites - This is **static** content which should already be at the 
     vantage6 server. This is also not needed in case of the RAVEN UI, and you can skip
     this section as I already have created the required content.

...  

## 1. Enable self-signed certificates
The Orchestrator and the Vantage6 server are using self-signed certificates. This is not
the best practice, but it is what it is. This section is to create a CA bundle from the
TLS certificate and key, and set the `REQUESTS_CA_BUNDLE` environment variable. So that 
the requests library can verify the server's certificate.

The following code is to create a CA bundle from the TLS certificate and key, and set 
the `REQUESTS_CA_BUNDLE` environment variable. **Note that the expected crt and key 
files are base64 encoded.**

In [13]:
# Set these to the correct paths on your local machine
crt_file = "/Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/tls.crt"
key_file = "/Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/tls.key"
bundle_path = "/Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/ca-bundle.pem"

In [14]:

import base64
def create_ca_bundle(cert_file, key_file, output_path):
    """
    Create a CA bundle file from base64 encoded certificate and key.

    Args:
        cert_base64: Base64 encoded certificate
        key_base64: Base64 encoded private key
        output_path: Path where to save the combined CA bundle
    """
    try:
        with open(cert_file, 'rb') as cert_file:
            cert_data = cert_file.read()
        with open(key_file, 'rb') as key_file:
            key_data = key_file.read()

        # Decode base64 strings
        cert_data = base64.b64decode(cert_data).decode('utf-8')
        key_data = base64.b64decode(key_data).decode('utf-8')

        # Combine them in the correct order (certificate first, then key)
        combined_data = cert_data + '\n' + key_data

        # Write the combined data to the output file
        with open(output_path, 'w') as out_file:
            out_file.write(combined_data)

        print(f"CA bundle created successfully at: {output_path}")

        # Set the environment variable
        os.environ['REQUESTS_CA_BUNDLE'] = output_path
        print(f"REQUESTS_CA_BUNDLE environment variable set to: {output_path}")

    except Exception as e:
        print(f"Error creating CA bundle: {e}")

In [15]:
create_ca_bundle(
    "/Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/tls.crt",
    "/Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/tls.key",
    bundle_path
)
os.environ["REQUESTS_CA_BUNDLE"] = bundle_path

CA bundle created successfully at: /Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/ca-bundle.pem
REQUESTS_CA_BUNDLE environment variable set to: /Users/frankmartin/Repositories/idea4rc-vantage6-algorithms/dev_notebooks/ca-bundle.pem


## 2. Authenticate with the vantage6 server
I am using the old way of authenticating with the vantage6 server which is going to 
change in the near future as we are using KeyCloak for authentication. In the new 
senario the users will be authenticated using their own credentials (users need to be 
created in vantage6 static content).

In [16]:
auth_response = requests.post(
    "https://orchestrator.idea.lst.tfo.upm.es/server/token/user",
    json={"username": "root", "password": "root"}
)
auth_response.json()


{'access_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTc1MDE2MjI2NywianRpIjoiMTk1ZjNkZDMtODg2Ni00MzMwLWFiZTctNWQzMTFkZjljOGE1IiwidHlwZSI6ImFjY2VzcyIsInN1YiI6MSwibmJmIjoxNzUwMTYyMjY3LCJleHAiOjE3NTAxODM4NjcsImNsaWVudF90eXBlIjoidXNlciIsInJvbGVzIjpbIlJvb3QiXX0.nwP2_-1PT4cZxlpF1dz8sfu7H1mZXfx6VrV6BuoAwZQ',
 'refresh_token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTc1MDE2MjI2NywianRpIjoiMzAyMjUzNGMtMWVlZi00MWViLTg1MWUtYjI4ZTQ1MDc3ZTliIiwidHlwZSI6InJlZnJlc2giLCJzdWIiOjEsIm5iZiI6MTc1MDE2MjI2NywiZXhwIjoxNzUwMzM1MDY3LCJjbGllbnRfdHlwZSI6InVzZXIiLCJyb2xlcyI6WyJSb290Il19.8TqPlXLdFnFZq5BZLGa-qbB5e5HaNejjW196qMKHdao',
 'refresh_url': '/server/token/refresh',
 'user_url': '/server/user/1'}

In [17]:
headers = {
    "Authorization": f"Bearer {auth_response.json()['access_token']}"
}

## 3. Creating prerequisites (Static content)
This is **static** content which should already be at the vantage6 server. This is also 
not needed in case of the RAVEN UI, and you can skip this section as I already have 
created the required content. 

**I've used the vantage6 client library to create the organizations and users. AS YOU DO NOT NEED TO CREATE THESE, YOU CAN SKIP THIS SECTION.** I left it in here for reference and so you understand the process.

In [18]:
# make sure you have the vantage6 client library installed
# !pip install vantage6-client
from vantage6.client import Client

# create the organizations
client = Client("https://orchestrator.idea.lst.tfo.upm.es", 443, "/server", log_level="INFO")
client.authenticate("root", "root")

 Welcome to
                  _                     __  
                 | |                   / /  
__   ____ _ _ __ | |_ __ _  __ _  ___ / /_  
\ \ / / _` | '_ \| __/ _` |/ _` |/ _ \ '_ \ 
 \ V / (_| | | | | || (_| | (_| |  __/ (_) |
  \_/ \__,_|_| |_|\__\__,_|\__, |\___|\___/ 
                            __/ |           
                           |___/            

 --> Join us on Discord! https://discord.gg/rwRvwyK
 --> Docs: https://docs.vantage6.ai
 --> Blog: https://vantage6.ai
------------------------------------------------------------
Cite us!
If you publish your findings obtained using vantage6, 
please cite the proper sources as mentioned in:
https://vantage6.ai/vantage6/references
------------------------------------------------------------
Successfully authenticated
 --> Succesfully authenticated
 --> Name: None (id=1)
 --> Organization: root (id=1)


### 3.1 Create the organizations

In [19]:
client.organization.create(
    name="Example Organization 1",
    address1="123 Main St",
    address2="Apt 1",
    zipcode="1234AB",
    country="NL",
    domain="example-organization-1.com",
)

Server responded with error code: 400
msg: Organization with name 'Example Organization 1' already exists!. Endpoint: organization
Refreshing token
Server responded with error code: 400
msg: Organization with name 'Example Organization 1' already exists!. Endpoint: organization
Nope, refreshing the token didn't fix it.


{'msg': "Organization with name 'Example Organization 1' already exists!"}

In [20]:
client.organization.create(
    name="Example Organization 2",
    address1="123 Main St",
    address2="Apt 2",
    zipcode="1234AB",
    country="NL",
    domain="example-organization-2.com",
)

Server responded with error code: 400
msg: Organization with name 'Example Organization 2' already exists!. Endpoint: organization
Refreshing token
Server responded with error code: 400
msg: Organization with name 'Example Organization 2' already exists!. Endpoint: organization
Nope, refreshing the token didn't fix it.


{'msg': "Organization with name 'Example Organization 2' already exists!"}

The organizations are created. All organization have an ID which can be used to 
identify the organization at a later stage.

** DID WE LINK THE ORGANIZATION TO THE RAVEN UI? **

In [52]:
client.organization.list(fields=('id', 'name'))

[{'id': 3, 'name': 'Example Organization 2'},
 {'id': 2, 'name': 'Example Organization 1'},
 {'id': 1, 'name': 'root'}]

### 3.2 Create the users

In [22]:
client.role.list(fields=('id', 'name'))

[{'id': 2, 'name': 'container'},
 {'id': 1, 'name': 'Root'},
 {'id': 5, 'name': 'Researcher'},
 {'id': 6, 'name': 'Organization Admin'},
 {'id': 4, 'name': 'Viewer'},
 {'id': 3, 'name': 'node'},
 {'id': 7, 'name': 'Collaboration Admin'}]

In [23]:
client.user.create(
    username="user1",
    password="Password123!",
    email="user1@example-organization-1.com",
    firstname="User 1",
    lastname="User 1",
    organization=2,
    roles=[6]
)

Server responded with error code: 400
msg: username already exists. Endpoint: user
Refreshing token
Server responded with error code: 400
msg: username already exists. Endpoint: user
Nope, refreshing the token didn't fix it.


{'msg': 'username already exists.'}

In [24]:
client.user.create(
    username="user2",
    password="Password123!",
    email="user2@example-organization-2.com",
    firstname="User 2",
    lastname="User 2",
    organization=3,
    roles=[6]
)

Server responded with error code: 400
msg: username already exists. Endpoint: user
Refreshing token
Server responded with error code: 400
msg: username already exists. Endpoint: user
Nope, refreshing the token didn't fix it.


{'msg': 'username already exists.'}

In [25]:
client.user.create(
    username="raven",
    password="Password123!",
    email="raven@example-organization-2.com",
    firstname="Raven",
    lastname="Raven",
    organization=2,
    roles=[7]
)

Server responded with error code: 400
msg: username already exists. Endpoint: user
Refreshing token
Server responded with error code: 400
msg: username already exists. Endpoint: user
Nope, refreshing the token didn't fix it.


{'msg': 'username already exists.'}

In [26]:
client.user.list(fields=('id', 'username'))

[{'id': 4, 'username': 'raven'},
 {'id': 1, 'username': 'root'},
 {'id': 2, 'username': 'user1'},
 {'id': 3, 'username': 'user2'}]

### 3.3 Create the collaboration

In [27]:
client.collaboration.create(
    name="Example Collaboration 1",
    organizations=[2, 3]
)

Server responded with error code: 400
msg: Collaboration name 'Example Collaboration 1' already exists!. Endpoint: collaboration
Refreshing token
Server responded with error code: 400
msg: Collaboration name 'Example Collaboration 1' already exists!. Endpoint: collaboration
Nope, refreshing the token didn't fix it.


{'msg': "Collaboration name 'Example Collaboration 1' already exists!"}

In [28]:
client.collaboration.list(fields=('id', 'name'), scope="global")

[{'id': 1, 'name': 'Example Collaboration 1'}]

### 3.4 Create the nodes

In [29]:
client.node.create(
    collaboration=1,
    organization=2,
    name="Organization 2 Node 1",
)

{'id': 5,
 'last_seen': None,
 'type': 'node',
 'name': 'Organization 2 Node 1',
 'ip': None,
 'status': 'offline',
 'organization': {'id': 2,
  'link': '/server/organization/2',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'config': [],
 'collaboration': {'id': 1,
  'link': '/server/collaboration/1',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'api_key': '10adb4fa-7b8c-4ac9-9aaf-e823d2bd3a81'}

In [30]:
client.node.create(
    collaboration=1,
    organization=3,
    name="Organization 3 Node 1",
)

{'id': 6,
 'last_seen': None,
 'type': 'node',
 'name': 'Organization 3 Node 1',
 'ip': None,
 'status': 'offline',
 'organization': {'id': 3,
  'link': '/server/organization/3',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'config': [],
 'collaboration': {'id': 1,
  'link': '/server/collaboration/1',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'api_key': 'cb208ffd-8735-4dc4-8ce4-e4fab9e37b84'}

In [34]:
client.node.list(fields=("id", "name", "status"))

[{'id': 5, 'name': 'Organization 2 Node 1', 'status': 'offline'},
 {'id': 6, 'name': 'Organization 3 Node 1', 'status': 'offline'}]

## 4. New Workspace
When a new workspace in RAVEn is created we need to create a new study in vantage6.  
A study in vantage6 is a collection of organization that are allowed to be used in the 
computations.

We first login as the RAVEN user and get the token. (We needed a different user for  
creating the static (fixed) content.)

In [35]:
auth_response = requests.post(
    "https://orchestrator.idea.lst.tfo.upm.es/server/token/user",
    json={"username": "raven", "password": "Password123!"}
)
headers = {
    "Authorization": f"Bearer {auth_response.json()['access_token']}"
}

We need the organizations ids (the internal ids in vantage6) that belong to this   
workspace tocreate a new study. The name of the study needs to be unique. I guess the  
name of the workspace is also unique, so we can use that.

In [37]:
response = requests.post("https://orchestrator.idea.lst.tfo.upm.es/server/study", headers=headers, json={
    "collaboration_id": 1,
    "name": "Example Study 2",
    "organization_ids": [2, 3],
})
response.json()

{'name': 'Example Study 2',
 'tasks': '/server/task?study_id=2',
 'organizations': [{'collaborations': '/server/collaboration?organization_id=2',
   'country': 'NL',
   'domain': 'example-organization-1.com',
   'id': 2,
   'nodes': '/server/node?organization_id=2',
   'tasks': '/server/task?init_org_id=2',
   'name': 'Example Organization 1',
   'studies': '/server/study?organization_id=2',
   'zipcode': '1234AB',
   'public_key': '',
   'users': '/server/user?organization_id=2',
   'address2': 'Apt 1',
   'address1': '123 Main St',
   'runs': '/server/run?organization_id=2'},
  {'collaborations': '/server/collaboration?organization_id=3',
   'country': 'NL',
   'domain': 'example-organization-2.com',
   'id': 3,
   'nodes': '/server/node?organization_id=3',
   'tasks': '/server/task?init_org_id=3',
   'name': 'Example Organization 2',
   'studies': '/server/study?organization_id=3',
   'zipcode': '1234AB',
   'public_key': '',
   'users': '/server/user?organization_id=3',
   'address

In [36]:
response = requests.get("https://orchestrator.idea.lst.tfo.upm.es/server/study", headers=headers)
response.json()["data"]

[{'name': 'Example Study 1',
  'tasks': '/server/task?study_id=1',
  'organizations': '/server/organization?study_id=1',
  'collaboration': {'id': 1,
   'link': '/server/collaboration/1',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'id': 1}]

## 5. New Analysis
When a new analysis is created we need to create a new session in vantage6. A session  
is a container on the data stations in which we can store dataframes (an extraction of  
the data from the OMOP database). We need the study id which should be stored in the  
workspace.

In [39]:
response = requests.post("https://orchestrator.idea.lst.tfo.upm.es/server/session", headers=headers, json={
    "collaboration_id": 1,
    "name": "Example Session 2",
    "study_id": 2,
    "scope": "collaboration"
})
response.json()

{'dataframes': '/server/session/2/dataframe',
 'id': 2,
 'tasks': '/server/task?session_id=2',
 'name': 'Example Session 2',
 'owner': {'id': 4,
  'link': '/server/user/4',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'last_used_at': '2025-06-17T11:14:25.442178',
 'created_at': '2025-06-17T11:14:25.442105',
 'scope': 'col',
 'ready': True,
 'study': {'id': 2,
  'link': '/server/study/2',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'collaboration': {'id': 1,
  'link': '/server/collaboration/1',
  'methods': ['DELETE', 'GET', 'PATCH']}}

## 6. New cohort
When a new cohort is created vantage6 needs to extract the data from the OMOP database  
and store it in the session as a dataframe. This is done by executing a vantage6  
extraction task.

In [53]:
#
# Static content
#

# This is the collaboration id, which in IDEA4RC is always a constant (static content).
collaboration_id = 1

# The image contains the query to extract the data from the OMOP database.
image = "https://harbor2.vantage6.ai/idea4rc/idea4rc-vantage6-algorithms:latest"

# In vantage6 a single node can have multiple databases attached to it. In IDEA4RC we
# only attach to the OMOP database which we labeled "omop".
label = "omop"

#
# Dynamic content
#
# The study id should be linked to the workspace.
study_id = 2

# The session id should be linked to the analysis.
session_id = 2

# The name of the cohort, this should be unique within a session. You can probably use
# the same name that you use in the RAVEN UI. Alternatively, we can also not send it. In
# that case the name will be generated by vantage6.
name = "Cohort name 1"

# Each `image` can have multiple `methods`. We need to use a different method for
# sarcoma and head and neck as we are extracting different features.
method = "extract_sarcoma_features"
# method = "extract_head_and_neck_features"

In [63]:
# In vantage6 we can always see who are included in the study, but this request is
# probably redundant as you already know the (v6) organizations ID at this point.
orgs = requests.get(
    "https://orchestrator.idea.lst.tfo.upm.es/server/organization",
    params={"per_page": 999, "study_id": study_id},
    headers=headers
).json()
org_ids = [org["id"] for org in orgs["data"]]
org_ids

[2, 3]

In [64]:
# before we can create a task we need to prepare task instructions. In vantage6 we can
# (but we dont in IDEA4RC) use end-to-end encryption, therefore we need to store the
# input for each organization individually.
payload = {
    "label": label,
    "task": {
        "method": method,
        "image": image,
        "organizations": [
            {
                "id": id_,
                "input": ""
            }
            for id_ in org_ids
        ]
    }
}

In [None]:
# Create a vantage6 task to extract the data from the omop data source and store it
# into a dataframe.
response = requests.post(
    f"https://orchestrator.idea.lst.tfo.upm.es/server/session/{session_id}/dataframe",
    headers=headers,
    json=payload
)
print(response)
response.json()

<Response [201]>


{'session': {'id': 2,
  'link': '/server/session/2',
  'methods': ['DELETE', 'GET', 'PATCH']},
 'tasks': '/server/task?dataframe_id=2',
 'name': 'crazy_dirac',
 'db_label': 'omop',
 'columns': [],
 'last_session_task': {'algorithm_store': None,
  'dataframe': {'db_label': 'omop', 'name': 'crazy_dirac', 'id': 2},
  'results': '/server/result?task_id=2',
  'method': 'extract_sarcoma_features',
  'id': 2,
  'session': {'id': 2,
   'link': '/server/session/2',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'databases': [{'label': 'omop',
    'type': 'source',
    'dataframe_id': None,
    'dataframe_name': None}],
  'runs': '/server/run?task_id=2',
  'required_by': [],
  'init_user': {'id': 4,
   'link': '/server/user/4',
   'methods': ['DELETE', 'GET', 'PATCH']},
  'created_at': '2025-06-18T07:35:05.065320',
  'image': 'https://harbor2.vantage6.ai/idea4rc/idea4rc-vantage6-algorithms:latest',
  'depends_on': [],
  'name': 'Session initialization: Example Session 2',
  'init_org': {'id': 2,
 

### FROM HERE ON WE NEED TEST NODES IN THE CAPSULE WITH SOME DATA.  
### I WILL TRY TO GET THEM UP BEFORE MY HOLIDAY. THE PROCESS FOR ALL THESE STEPS  
### IS VERY SIMILAR TO THE CREATION OF THE DATAFRAME

In [68]:
# TODO:
#   - Poll for the dataframe status
#   - Compute summary statistics on the dataframe
#   - Poll for the summary statistics results
#   - Create analytics
#   - Poll for analytics results
#   - Start preprocessing task
#   - Poll for the preprocessing task to finish

## 7. Summary statistics
Before we can display the summary statistics we need to calculate them. This is done through a vantage6 algorithm. 

We first need to be sure the dataframe is ready to be used. Then we can execute the algorithm and await the results to be displayed.

## 8. Create analytics (TODO)
- Hardcoded algorithms
- algorithm metadata (parameters, etc.)