In [1]:
url = "https://v2.fairmodels.org/instance/3f400afb-df5e-4798-ad50-0687dd439d9b"
validation_filename = "thunder_reduced.xlsx"
outcome_parameter = 'pCR'

In [2]:
! pip install docker




[notice] A new release of pip is available: 23.0.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
# fetch model metadata from URL, and specifically request the accept type as JSON-LD
import requests
import json

response = requests.get(url, headers={'Accept': 'application/ld+json'})
model_metadata = json.loads(response.text)
docker_image_name = model_metadata['FAIRmodels image name']['@value']

In [4]:
# pull docker image
import docker
client = docker.from_env()
try:
    client.images.pull(docker_image_name)
except docker.errors.APIError as e:
    print("could not pull image: ", e)

# run docker image and expose port 8000 to a random port which is freely available
import socket
import random

port = random.randint(49152, 65535)
# check if port is available
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
    while s.connect_ex(('localhost', port)) == 0:
        port = random.randint(49152, 65535)

container = client.containers.run(docker_image_name, detach=True, ports={8000:port}, remove=True)

# wait for the server to start
import time
time.sleep(5)

In [5]:
# get the JSON from the root webpage in the container
image_root_url = f"http://localhost:{port}"
response = requests.get(image_root_url)
data = response.json()

subpath_url = data['models'][0]['path_parameters']
subpath_execution_url = data['models'][0]['path']

# get input/output parameters of the first model
response = requests.get(f"{image_root_url}/{subpath_url}")
model_parameters = response.json()
print(f"Model parameters: {model_parameters}")

columns = model_parameters + [outcome_parameter]

Model parameters: ['cT', 'cN', 'tLength']


In [6]:
# read excel sheet as input data using pandas dataframe
import pandas as pd

input_data = pd.read_excel(validation_filename)

def check_columns_exist(input_data, columns, raise_exception=False):
    """
    Check if all columns exist in the input data

    :param input_data: pandas dataframe
    :param columns: list of columns
    :param raise_exception: boolean, if True, raise exception if columns are missing, otherwise print missing columns

    :return: None
    """
    missing_columns = []
    for column in columns:
        if column not in input_data.columns:
            missing_columns.append(column)
    
    # if there are missing columns, throw exception
    if len(missing_columns) > 0:
        if raise_exception:
            raise ValueError(f"Missing columns: {missing_columns}")
        else:
            print(f"Missing columns: {missing_columns}")
    else:
        print("All columns exist")

check_columns_exist(input_data, columns)

Missing columns: ['tLength']


In [7]:
# rename column "SizeZ" to "tLength"
input_data = input_data.rename(columns={"SizeZ": "tLength"})
check_columns_exist(input_data, columns)

All columns exist


In [8]:
# replace all cells with "x" to NA
input_data = input_data.replace("x", pd.NA)

# input data should only contain complete cases
input_data = input_data.dropna(subset=columns)

# convert pandas dataframe to JSON, but only for the columns specified in the model
input_data_json = json.loads(input_data[columns].to_json(orient='records'))

# send the input data to the model
response = requests.post(f"http://localhost:{port}/{subpath_execution_url}", json=input_data_json)
input_data['predictions'] = response.json()

In [9]:
# stop the running container
container.stop()

In [10]:
# convert outcome parameter to boolean
input_data[outcome_parameter] = input_data[outcome_parameter] == 1

# calculate AUC
from sklearn.metrics import roc_auc_score, brier_score_loss

auc = roc_auc_score(input_data[outcome_parameter], input_data['predictions'])

# calculate brier score
brier = brier_score_loss(input_data[outcome_parameter], input_data['predictions'])

scores = {
    "AUC": auc,
    "Brier": brier
}

print(f"Scores: {scores}")

Scores: {'AUC': 0.6988795518207283, 'Brier': 0.18465341199853313}
