In [None]:
def showrst(df):
    print(df.to_markdown(tablefmt="grid"))

In [None]:
import pandas as pd
import nannyml as nml
from IPython.display import display
reference, analysis, analysis_gt = nml.datasets.load_synthetic_binary_classification_dataset()
display(reference.head(3))

metadata = nml.extract_metadata(reference, model_type=nml.ModelType.CLASSIFICATION_BINARY, exclude_columns=['identifier'])
metadata.target_column_name = 'work_home_actual'
display(metadata.is_complete())

cbpe = nml.CBPE(
    model_metadata=metadata,
    chunk_size=5000,
    metrics=['roc_auc', 'f1', 'precision', 'recall', 'specificity', 'accuracy']
).fit(reference_data=reference)
est_perf = cbpe.estimate(pd.concat([reference, analysis], ignore_index=True))
display(est_perf.data.head(3))

for metric in cbpe.metrics:
    figure = est_perf.plot(kind='performance', metric=metric)
    figure.show()

# Temporary data fetching

In [None]:
nml.__version__

In [None]:
df = pd.read_parquet("s3://datasets-performance-prediction-eval/P005/multiclass_classification_complex_data_drift_v1.pq")

In [None]:
df['partition'].value_counts()

In [None]:
df.columns

In [None]:
reference = df[df['partition']=='reference'].copy()
analysis = df[df['partition']=='analysis'].copy()

In [None]:
import pandas as pd
import nannyml as nml
from IPython.display import display

metadata = nml.extract_metadata(reference, model_type=nml.ModelType.CLASSIFICATION_MULTICLASS, exclude_columns=['identifier'])
metadata.target_column_name = 'y_true'
metadata.predicted_probabilities_column_names = {
    0: 'y_pred_proba_0',
    1: 'y_pred_proba_1',
    2: 'y_pred_proba_2',
    3: 'y_pred_proba_3',
    4: 'y_pred_proba_4',
}
display(metadata.is_complete())



cbpe = nml.CBPE(model_metadata=metadata, chunk_size=3000, metrics=['roc_auc', 'f1'])
cbpe.fit(reference)
est_perf = cbpe.estimate(pd.concat([reference, analysis]))

for metric in cbpe.metrics:
    figure = est_perf.plot(kind="performance", metric=metric)
    figure.show()
#     figure.write_image(f'tutorial-perf-est-multiclass-{metric}.svg', engine='orca')

In [None]:
display(est_perf.data.head(3))

In [None]:
showrst(est_perf.data.head(3))