# Tabular data example

In [1]:
from velour.client import Client, TabularDataset, TabularModel
from velour.data_types import Label, ScoredLabel

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [2]:
from velour.client import Client

client = Client("http://localhost:8000")

Succesfully connected to http://localhost:8000/.


In [3]:
dset = load_breast_cancer()
X, y, target_names = dset["data"], dset["target"], dset["target_names"]
X_train, X_test, y_train, y_test = train_test_split(X, y)

pipe = make_pipeline(StandardScaler(), LogisticRegression())

In [4]:
X_train.shape, y_train[:4], target_names

((426, 30), array([0, 1, 1, 1]), array(['malignant', 'benign'], dtype='<U9'))

In [5]:
pipe.fit(X_train, y_train)

In [6]:
y_train_probs = pipe.predict_proba(X_train)
y_test_probs = pipe.predict_proba(X_test)

In [7]:
y_train_probs[:4]

array([[9.95261940e-01, 4.73805997e-03],
       [1.46927840e-03, 9.98530722e-01],
       [2.46585889e-05, 9.99975341e-01],
       [7.72618678e-04, 9.99227381e-01]])

## Dataset ingestion

We now ingest the groundtruth labels into velour. For each sample, velour expects a list of `Label` objects. Each `Label` has a key and value. Allowing key/value labels and having a single row be annotated by multiple labels supports multi-label classification.

In this example there's just a single label per element and we'll set the class key to "class". The `add_groundtruth` method returns the ids of the newly created groundtruth.

In [8]:
velour_train_dataset = client.create_tabular_dataset("breast-cancer-train")

_ = velour_train_dataset.add_groundtruth(
    [[Label(key="class", value=target_names[t])] for t in y_train]
)

velour_test_dataset = client.create_tabular_dataset("breast-cancer-test")

_ = velour_test_dataset.add_groundtruth(
    [[Label(key="class", value=target_names[t])] for t in y_test]
)

## Model inference ingestion

Now we create a velour model and post the predictions on the two datasets. Each prediction should be a list of `ScoredLabel`, which consist of a label and a confidence score. The confidence scores over all of the classes in a key must sum to (approximately) 1.

In [9]:
velour_model = client.create_tabular_model("breast-cancer-linear-model")
# velour_model = client.get_model("breast-cancer-linear-model")

In [10]:
# add the train predictions
_ = velour_model.add_predictions(
    dataset=velour_train_dataset,
    predictions=[
        [ScoredLabel(label=Label(key="class", value=target_names[i]), score=p)
         for i, p in enumerate(prob)]
        for prob in y_train_probs
    ]
)

# add the test predictions
_ = velour_model.add_predictions(
    dataset=velour_test_dataset,
    predictions=[
        [ScoredLabel(label=Label(key="class", value=target_names[i]), score=p)
         for i, p in enumerate(prob)]
        for prob in y_test_probs
    ]
)

In [11]:
for dset in [velour_train_dataset, velour_test_dataset]:
    dset.finalize()
    velour_model.finalize_inferences(dset)

In [12]:
train_eval_job = velour_model.evaluate_classification(velour_train_dataset)
test_eval_job = velour_model.evaluate_classification(velour_test_dataset)

In [13]:
train_eval_job.status()

'Processing'

In [15]:
train_eval_job.status()

'Done'

In [16]:
train_eval_job.metrics()

[{'type': 'Accuracy',
  'parameters': {'label_key': 'class'},
  'value': 0.9929577464788732},
 {'type': 'ROCAUC',
  'parameters': {'label_key': 'class'},
  'value': 0.997331007874848},
 {'type': 'Precision',
  'value': 0.9888059701492538,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Recall',
  'value': 1.0,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'F1',
  'value': 0.9943714821763603,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Precision',
  'value': 1.0,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'Recall',
  'value': 0.9813664596273292,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'F1',
  'value': 0.9905956112852665,
  'label': {'key': 'class', 'value': 'malignant'}}]

In [17]:
test_eval_job.metrics()

[{'type': 'Accuracy',
  'parameters': {'label_key': 'class'},
  'value': 0.9790209790209791},
 {'type': 'ROCAUC',
  'parameters': {'label_key': 'class'},
  'value': 0.997331007874848},
 {'type': 'Precision',
  'value': 0.978494623655914,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Recall',
  'value': 0.9891304347826086,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'F1',
  'value': 0.9837837837837837,
  'label': {'key': 'class', 'value': 'benign'}},
 {'type': 'Precision',
  'value': 0.98,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'Recall',
  'value': 0.9607843137254902,
  'label': {'key': 'class', 'value': 'malignant'}},
 {'type': 'F1',
  'value': 0.9702970297029702,
  'label': {'key': 'class', 'value': 'malignant'}}]

In [18]:
settings_and_dfs = velour_model.get_metric_dataframes()

In [19]:
settings = settings_and_dfs[0]["settings"]
df = settings_and_dfs[0]["df"]

In [20]:
print(settings)

{'model_pred_task_type': 'Classification', 'dataset_gt_task_type': 'Classification'}


In [21]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,value
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,breast-cancer-test,breast-cancer-train
type,parameters,label,Unnamed: 3_level_2,Unnamed: 4_level_2
Accuracy,"{""label_key"": ""class""}",,0.979021,0.992958
F1,"""n/a""",class: benign,0.983784,0.994371
F1,"""n/a""",class: malignant,0.970297,0.990596
Precision,"""n/a""",class: benign,0.978495,0.988806
Precision,"""n/a""",class: malignant,0.98,1.0
ROCAUC,"{""label_key"": ""class""}",,0.997331,0.997331
Recall,"""n/a""",class: benign,0.98913,1.0
Recall,"""n/a""",class: malignant,0.960784,0.981366


## Sanity check scikit-learn classification report

In [22]:
from sklearn.metrics import classification_report

In [23]:
y_train_preds = pipe.predict(X_train)

In [24]:
print(classification_report(y_train, y_train_preds, digits=6, target_names=target_names))

              precision    recall  f1-score   support

   malignant   1.000000  0.981366  0.990596       161
      benign   0.988806  1.000000  0.994371       265

    accuracy                       0.992958       426
   macro avg   0.994403  0.990683  0.992484       426
weighted avg   0.993037  0.992958  0.992944       426

