# Data

### Reference data

In [23]:
import pandas as pd
from datetime import datetime

In [24]:
df_ref = pd.read_csv('data/kidney_ref.csv')

In [25]:
df_ref.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pe,ane,class,y_pred,clf_target,y_pred_proba,pred_timestamp,model_id,model_version,period
0,214,55.0,80.0,1.01,3.0,1,1,0,1,1,...,1,0,1,1,0,1.0,2022-07-01,1,1,reference
1,372,28.0,60.0,1.025,0.0,0,1,1,0,0,...,0,0,0,1,0,1.0,2022-07-01,1,1,reference
2,367,60.0,80.0,1.025,0.0,0,1,1,0,0,...,0,0,0,0,0,0.99,2022-07-01,1,1,reference
3,260,35.0,80.0,1.02,0.0,0,1,1,0,0,...,0,0,0,0,0,0.98,2022-07-01,1,1,reference
4,374,61.0,70.0,1.025,0.0,0,1,1,0,0,...,0,0,0,1,0,0.91,2022-07-01,1,1,reference


### New data

In [26]:
df_new = pd.read_csv('data/dataframe_client.csv', sep = ',')

In [27]:
df_new.head()

Unnamed: 0,time,age,al,ane,appet,ba,bgr,bp,bu,cad,...,rbcc,sc,sg,sod,su,target,wbcc,y_name,pred_timestamp,eval_timestamp
0,2022-08-06T00:35:06.565042Z,83.0,3.0,1,1,0,102.0,70.0,60.0,0,...,3.1,2.6,1.02,115.0,0,1,12800.0,,2022-08-21,2022-08-26
1,2022-08-09T11:47:43.406341Z,25.0,0.0,0,0,0,121.0,80.0,19.0,0,...,5.3,1.2,1.025,142.0,0,0,6900.0,,2022-08-21,2022-08-26
2,2022-08-12T12:24:23.593044Z,25.0,0.0,0,0,0,121.0,80.0,19.0,0,...,5.3,1.2,1.025,142.0,0,0,6900.0,,2022-08-21,2022-08-26
3,2022-08-12T12:36:14.661246Z,25.0,0.0,0,0,0,121.0,80.0,19.0,0,...,5.3,1.2,1.025,142.0,0,0,6900.0,,2022-08-21,2022-08-26
4,2022-08-16T11:34:56.583918Z,25.0,0.0,0,0,0,121.0,80.0,19.0,0,...,5.3,1.2,1.025,142.0,0,0,6900.0,ABC,2022-08-21,2022-08-26


# Metrics

## Metrics types

In [28]:
from pulsar_metrics.metrics.base import MetricsType
MetricsType._member_names_

['performance', 'drift', 'custom']

## MetricsResults

In [29]:
from pulsar_metrics.metrics.base import MetricResults

In [30]:
result = MetricResults(name = "accuracy", type = "performance", model_id = "1", model_version = "1.0", period_end = datetime(2022,8,10))

TypeError: 'module' object is not callable

In [None]:
result

MetricResults(metric_name=None, type='performance', model_id='1', model_version='1.0', data_id=None, feature=None, value=None, status=None, threshold=None, period_start=None, period_end=datetime.datetime(2022, 8, 10, 0, 0), eval_timestamp=datetime.datetime(2022, 10, 20, 11, 30, 35, 355092), conf_int=None)

### Performance metrics

In [None]:
from pulsar_metrics.metrics.performance import PerformanceMetricsFuncs
PerformanceMetricsFuncs._member_names_

['accuracy',
 'precision',
 'recall',
 'f1',
 'log_loss',
 'auc',
 'aucpr',
 'brier',
 'mse',
 'mae',
 'mape',
 'r2']

In [None]:
from pulsar_metrics.metrics.performance import PerformanceMetric

auc = PerformanceMetric(name = 'auc', data = df_ref, y_name = 'clf_target')

In [None]:
auc._result

In [None]:
auc_result = auc.evaluate(bootstrap=False,  threshold = 0.5)
auc_result

MetricResults(metric_name='auc', type='performance', model_id='1', model_version='1', data_id=None, feature='prediction', value=0.7727272727272727, status=False, threshold=0.5, period_start=Timestamp('2022-07-01 00:00:00'), period_end=Timestamp('2022-07-01 00:00:00'), eval_timestamp=datetime.datetime(2022, 10, 20, 11, 30, 35, 355092), conf_int=None)

### Drift metrics

In [None]:
from pulsar_metrics.metrics.drift import DriftMetricsFuncs, DriftTestMetricsFuncs

In [None]:
DriftMetricsFuncs._member_names_

['kl', 'wasserstein']

In [None]:
DriftTestMetricsFuncs._member_names_

['ttest', 'manwu', 'levene', 'bftest', 'ks_2samp', 'CvM', 'chi2']

In [None]:
from pulsar_metrics.metrics.drift import DriftMetric

In [None]:
feature_name = 'bgr'
kl = DriftMetric(name = 'kl',  data = df_new, feature_name = feature_name)

In [None]:
kl._result

In [None]:
kl_result = kl.evaluate(reference = df_ref[feature_name])
kl_result

MetricResults(metric_name='kl', type='drift', model_id='1', model_version='2', data_id=None, feature='bgr', value=0.662768815835448, status=None, threshold=None, period_start=Timestamp('2022-08-21 00:00:00'), period_end=Timestamp('2022-08-21 00:00:00'), eval_timestamp=datetime.datetime(2022, 10, 20, 11, 30, 35, 355092), conf_int=None)

### Custom metrics

In [None]:
from pulsar_metrics.metrics.base import CustomMetric
import numpy as np

In [None]:
@CustomMetric
def custom(a, b):
    return np.abs(a-b).min()

In [None]:
custom_metric = custom(name = 'custom', data = df_ref)

In [None]:
custom_metric.evaluate(a=df_new['bgr'], b = df_ref['bgr'])

MetricResults(metric_name='custom', type='custom', model_id='1', model_version='1', data_id=None, feature=None, value=4.0, status=None, threshold=None, period_start=Timestamp('2022-07-01 00:00:00'), period_end=Timestamp('2022-07-01 00:00:00'), eval_timestamp=datetime.datetime(2022, 10, 20, 11, 30, 35, 355092), conf_int=None)

## Analyzer

In [None]:
from pulsar_metrics.analyzers.base import Analyzer
analysis = Analyzer(name = 'First Analyzer', description='My first Analyzer', data = df_new)

In [None]:
analysis.add_drift_metrics(metrics_list=['kl', 'wasserstein'], features_list=['bgr', 'sg', 'al', 'bp'])
analysis.add_drift_metrics(metrics_list=['ks_2samp', 'ttest'], features_list=['bgr'])

Drift metric 'kl' for feature 'bgr' added to the analyzer list
Drift metric 'kl' for feature 'sg' added to the analyzer list
Drift metric 'kl' for feature 'al' added to the analyzer list
Drift metric 'kl' for feature 'bp' added to the analyzer list
Drift metric 'wasserstein' for feature 'bgr' added to the analyzer list
Drift metric 'wasserstein' for feature 'sg' added to the analyzer list
Drift metric 'wasserstein' for feature 'al' added to the analyzer list
Drift metric 'wasserstein' for feature 'bp' added to the analyzer list
Drift metric 'ks_2samp' for feature 'bgr' added to the analyzer list
Drift metric 'ttest' for feature 'bgr' added to the analyzer list


In [None]:
analysis.run(data_ref = df_ref, options = {'kl': {'threshold':0.7}, 
                                            'ttest':{'equal_var': False}})

In [None]:
analysis.results_to_pandas()

Unnamed: 0,metric_name,type,model_id,model_version,data_id,feature,value,status,threshold,period_start,period_end,eval_timestamp,conf_int
0,kl,drift,1,2,,bgr,0.662769,True,0.7,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
1,kl,drift,1,2,,sg,0.846529,False,0.7,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
2,kl,drift,1,2,,al,0.693147,True,0.7,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
3,kl,drift,1,2,,bp,0.078603,True,0.7,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
4,wasserstein,drift,1,2,,bgr,32.416667,,,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
5,wasserstein,drift,1,2,,sg,0.005,,,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
6,wasserstein,drift,1,2,,al,0.333333,,,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
7,wasserstein,drift,1,2,,bp,1.666667,,,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
8,ks_2samp,drift,1,2,,bgr,0.255775,False,0.05,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
9,ttest,drift,1,2,,bgr,0.794721,False,0.05,2022-08-21,2022-08-21,2022-10-20 11:30:35.355092,
