# Tabular data example

In [18]:
from velour.client import Client, Dataset, Model, ClientException
from velour.enums import TaskType
from velour import schemas

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [19]:
from velour.client import Client

client = Client("http://localhost:8000")

Succesfully connected to http://localhost:8000/.


In [20]:
dset = load_breast_cancer()
X, y, target_names = dset["data"], dset["target"], dset["target_names"]
X_train, X_test, y_train, y_test = train_test_split(X, y)

pipe = make_pipeline(StandardScaler(), LogisticRegression())

In [21]:
X_train.shape, y_train[:4], target_names

((426, 30), array([1, 0, 1, 1]), array(['malignant', 'benign'], dtype='<U9'))

In [22]:
pipe.fit(X_train, y_train)

In [23]:
y_train_probs = pipe.predict_proba(X_train)
y_test_probs = pipe.predict_proba(X_test)

In [24]:
y_train_probs[:4]

array([[5.65844136e-05, 9.99943416e-01],
       [9.99372617e-01, 6.27383221e-04],
       [1.48501435e-06, 9.99998515e-01],
       [1.00466350e-03, 9.98995336e-01]])

## Velour Dataset ingestion

We now ingest the groundtruth labels into velour. For each sample, velour expects a list of `Label` objects. Each `Label` has a key and value. Allowing key/value labels and having a single row be annotated by multiple labels supports multi-label classification.

In this example there's just a single label per element and we'll set the class key to "class". The `add_groundtruth` method returns the ids of the newly created groundtruth.

Create datasets

In [25]:
# reset (only needed if restarting each run)
client.delete_dataset("breast-cancer-train")
client.delete_dataset("breast-cancer-test")

# create or get train dataset
try:
    velour_train_dataset = Dataset.create(client, "breast-cancer-train")
except ClientException:
    velour_train_dataset = Dataset.get(client, "breast-cancer-train")

# create or get test dataset
try:
    velour_test_dataset = Dataset.create(client, "breast-cancer-test")
except ClientException:
    velour_test_dataset = Dataset.get(client, "breast-cancer-test")

Format data

In [26]:
# format training groundtruths
training_groundtruths = [
    schemas.GroundTruth(
        datum=schemas.Datum(
            uid=f"train{i}",
        ),
        annotations=[
            schemas.Annotation(
                task_type=TaskType.CLASSIFICATION,
                labels=[schemas.Label(key="class", value=target_names[t])]
            )
        ]
    )
    for i, t in enumerate(y_train)
]

# format testing groundtruths
testing_groundtruths = [
    schemas.GroundTruth(
        datum=schemas.Datum(
            uid=f"test{i}",
        ),
        annotations=[
            schemas.Annotation(
                task_type=TaskType.CLASSIFICATION,
                labels=[schemas.Label(key="class", value=target_names[t])]
            )
        ]
    )
    for i, t in enumerate(y_test)
]

Ingest data

In [27]:
# add the training groundtruths
for gt in training_groundtruths:
    velour_train_dataset.add_groundtruth(gt)

# add the testing groundtruths
for gt in testing_groundtruths:
    velour_test_dataset.add_groundtruth(gt)

Finalize datasets, necessary for evaluation

In [28]:
velour_train_dataset.finalize()
velour_test_dataset.finalize()

<Response [200]>

## Model inference ingestion

Now we create a velour model and post the predictions on the two datasets. Each prediction should be a list of `ScoredLabel`, which consist of a label and a confidence score. The confidence scores over all of the classes in a key must sum to (approximately) 1.

Create model

In [29]:
# rest (only necessary if restarting)
client.delete_model("breast-cancer-linear-model")

# create or get model
try:
    velour_model = Model.create(client, "breast-cancer-linear-model")
except ClientException:
    velour_model = Model.get(client, "breast-cancer-linear-model")

Format predictions

In [30]:
training_predictions = [
    schemas.Prediction(
        datum=schemas.Datum(
            dataset=velour_train_dataset.name,
            uid=f"train{i}",
        ),
        annotations=[
            schemas.Annotation(
                task_type=TaskType.CLASSIFICATION,
                labels=[
                    schemas.Label(
                        key="class", 
                        value=target_names[j],
                        score=p,
                    )                        
                    for j, p in enumerate(prob)
                ]
            )
        ]
    )
    for i, prob in enumerate(y_train_probs)
]

testing_predictions = [
    schemas.Prediction(
        datum=schemas.Datum(
            dataset=velour_test_dataset.name,
            uid=f"test{i}",
        ),
        annotations=[
            schemas.Annotation(
                task_type=TaskType.CLASSIFICATION,
                labels=[
                    schemas.Label(
                        key="class",
                        value=target_names[j],
                        score=p,
                    )                        
                    for j, p in enumerate(prob)
                ]
            )
        ]
    )
    for i, prob in enumerate(y_test_probs)
]

In [31]:
# add the train predictions
for pd in training_predictions:
    velour_model.add_prediction(pd)

# add the test predictions
for pd in testing_predictions:
    velour_model.add_prediction(pd)

finalize models, necessary for evaluation

In [32]:
velour_model.finalize_inferences(velour_train_dataset)
velour_model.finalize_inferences(velour_test_dataset)

evaluate

In [33]:
train_eval_job = velour_model.evaluate_classification(velour_train_dataset)
train_eval_job.wait_for_completion()

In [34]:
train_eval_job.status

<JobStatus.DONE: 'done'>

In [35]:
train_eval_job.metrics

[{'type': 'Accuracy',
  'parameters': {'label_key': 'class'},
  'value': 0.9882629107981221},
 {'type': 'ROCAUC',
  'parameters': {'label_key': 'class'},
  'value': 0.5605676232757246},
 {'type': 'Precision',
  'value': 0.9851301115241635,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Recall',
  'value': 0.9962406015037594,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'F1',
  'value': 0.9906542056074765,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Precision',
  'value': 0.9936305732484076,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'Recall',
  'value': 0.975,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'F1',
  'value': 0.9842271293375395,
  'label': {'key': 'class', 'value': 'malignant'}}]

In [36]:
train_eval_job.confusion_matrices

[{'label_key': 'class',
  'entries': [{'prediction': 'benign', 'groundtruth': 'benign', 'count': 265},
   {'prediction': 'benign', 'groundtruth': 'malignant', 'count': 4},
   {'prediction': 'malignant', 'groundtruth': 'benign', 'count': 1},
   {'prediction': 'malignant', 'groundtruth': 'malignant', 'count': 156}]}]

In [37]:
test_eval_job = velour_model.evaluate_classification(velour_test_dataset)
test_eval_job.wait_for_completion()

In [38]:
test_eval_job.metrics

[{'type': 'Accuracy',
  'parameters': {'label_key': 'class'},
  'value': 0.9790209790209791},
 {'type': 'ROCAUC',
  'parameters': {'label_key': 'class'},
  'value': 0.06243063263041064},
 {'type': 'Precision',
  'value': 0.9782608695652174,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Recall',
  'value': 0.989010989010989,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'F1',
  'value': 0.9836065573770493,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Precision',
  'value': 0.9803921568627451,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'Recall',
  'value': 0.9615384615384616,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'F1',
  'value': 0.970873786407767,
  'label': {'key': 'class', 'value': 'malignant'}}]

In [39]:
test_eval_job.confusion_matrices

[{'label_key': 'class',
  'entries': [{'prediction': 'benign', 'groundtruth': 'benign', 'count': 90},
   {'prediction': 'benign', 'groundtruth': 'malignant', 'count': 2},
   {'prediction': 'malignant', 'groundtruth': 'benign', 'count': 1},
   {'prediction': 'malignant', 'groundtruth': 'malignant', 'count': 50}]}]

evaluation metrics in a Pandas dataframe

In [40]:
settings_and_dfs = velour_model.get_metric_dataframes()

In [41]:
settings = settings_and_dfs[0]["settings"]
df = settings_and_dfs[0]["df"]

In [42]:
print(settings)

{'model': 'breast-cancer-linear-model', 'dataset': 'breast-cancer-train', 'gt_type': 'none', 'pd_type': 'none', 'task_type': 'classification', 'id': 7}


In [43]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,breast-cancer-train
type,parameters,label,Unnamed: 3_level_2
Accuracy,"{""label_key"": ""class""}",,0.988263
F1,"""n/a""",class: benign,0.990654
F1,"""n/a""",class: malignant,0.984227
Precision,"""n/a""",class: benign,0.98513
Precision,"""n/a""",class: malignant,0.993631
ROCAUC,"{""label_key"": ""class""}",,0.560568
Recall,"""n/a""",class: benign,0.996241
Recall,"""n/a""",class: malignant,0.975


## Sanity check scikit-learn classification report

In [44]:
from sklearn.metrics import classification_report

In [45]:
y_train_preds = pipe.predict(X_train)

In [46]:
print(classification_report(y_train, y_train_preds, digits=6, target_names=target_names))

              precision    recall  f1-score   support

   malignant   0.993631  0.975000  0.984227       160
      benign   0.985130  0.996241  0.990654       266

    accuracy                       0.988263       426
   macro avg   0.989380  0.985620  0.987441       426
weighted avg   0.988323  0.988263  0.988240       426

