In [None]:
import pandas as pd
import nannyml as nml

reference, analysis, analysis_target = nml.datasets.load_synthetic_sample()
reference['y_pred'] = reference['y_pred_proba'].map(lambda p: int(p >= 0.8))
analysis['y_pred'] = analysis['y_pred_proba'].map(lambda p: int(p >= 0.8))

reference.head(3)

In [None]:
def showrst(df):
    print(df.to_markdown(tablefmt="grid"))

In [None]:
data = pd.concat([reference, analysis.set_index('identifier').join(analysis_target.set_index('identifier'), on='identifier', rsuffix='_r')], ignore_index=True).reset_index(drop=True)
data.loc[data['partition'] == 'analysis'].head(3)

In [None]:
metadata = nml.extract_metadata(reference)
metadata.target_column_name = 'work_home_actual'
metadata.to_df()

In [None]:
performance_calculator = nml.PerformanceCalculator(model_metadata=metadata, metrics=['roc_auc', 'recall'], chunk_size=5000)
performance_calculator.fit(reference_data=reference)

In [None]:
realized_performance = performance_calculator.calculate(data)

In [None]:
showrst(realized_performance.data.head(3))

In [None]:
fig = realized_performance.plot(kind='performance', metric='roc_auc')
fig.write_image(file="../_static/performance_calculation_roc_auc.svg", engine="kaleido")
fig.show()

In [None]:
fig = realized_performance.plot(kind='performance', metric='recall')
fig.show()
