In [None]:
import pandas as pd
import nannyml as nml
from IPython.display import display
reference, analysis, analysis_target = nml.datasets.load_synthetic_sample()
display(reference.head(3))

data = pd.concat([reference, analysis.set_index('identifier').join(analysis_target.set_index('identifier'), on='identifier', rsuffix='_r')], ignore_index=True).reset_index(drop=True)
display(data.loc[data['partition'] == 'analysis'].head(3))

metadata = nml.extract_metadata(reference, model_type=nml.ModelType.CLASSIFICATION_BINARY, exclude_columns=['identifier'])
metadata.target_column_name = 'work_home_actual'
display(metadata.is_complete())

performance_calculator = nml.PerformanceCalculator(
    model_metadata=metadata,
    # use NannyML to tell us what metrics are supported
    metrics=nml.performance_estimation.confidence_based.results.SUPPORTED_METRIC_VALUES,
    chunk_size=5000
).fit(reference_data=reference)

realized_performance = performance_calculator.calculate(data)

display(realized_performance.data.head(3))

for metric in performance_calculator.metrics:
    realized_performance.plot(kind='performance', metric=metric).show()

In [None]:
import pandas as pd
import nannyml as nml
from IPython.display import display

reference, analysis, analysis_target = nml.datasets.load_synthetic_sample()
display(reference.head(3))

In [None]:
print(reference.head(3).to_markdown(tablefmt="grid"))

In [None]:
data = pd.concat([reference, analysis.set_index('identifier').join(analysis_target.set_index('identifier'), on='identifier', rsuffix='_r')], ignore_index=True).reset_index(drop=True)
display(data.loc[data['partition'] == 'analysis'].head(3))

In [None]:
print(data.loc[data['partition'] == 'analysis'].head(3).to_markdown(tablefmt="grid"))

In [None]:
metadata = nml.extract_metadata(reference, model_type=nml.ModelType.CLASSIFICATION_BINARY, exclude_columns=['identifier'])
metadata.target_column_name = 'work_home_actual'
display(metadata.is_complete())

In [None]:
print(metadata.to_df().to_markdown(tablefmt="grid"))

In [None]:
performance_calculator = nml.PerformanceCalculator(
    model_metadata=metadata,
    # use NannyML to tell us what metrics are supported
    metrics=nml.performance_estimation.confidence_based.results.SUPPORTED_METRIC_VALUES,
    chunk_size=5000
).fit(reference_data=reference)

In [None]:
realized_performance = performance_calculator.calculate(data)

In [None]:
display(realized_performance.data.head(3))

In [None]:
print(realized_performance.data.head(3).to_markdown(tablefmt="grid"))

In [None]:
for metric in performance_calculator.metrics:
    figure = realized_performance.plot(kind='performance', metric=metric)
    figure.show()
    # save figure - not shown on guide:
    # print(metric.display_name.replace(" ", "_"))
    # figure.write_image(file=f"../_static/tutorial-perf-guide-{metric.display_name.replace(' ', '_')}.svg")