# NannyML Workflow

In [None]:
!rm *.svg
!ls -la

In [None]:
import nannyml as nml
import pandas as pd

In [None]:
nml.__version__

In [None]:
reference, analysis, analysis_gt = nml.load_synthetic_sample()
print(reference.head().to_markdown(tablefmt="grid"))

In [None]:
print(analysis.tail().to_markdown(tablefmt="grid"))

In [None]:
md = nml.extract_metadata(data = reference, model_name='wfh_predictor')

In [None]:
md.print()

In [None]:
md.timestamp_column_name = 'timestamp'
# md.prediction_column_name = 'y_pred_proba'
md.target_column_name = 'work_home_actual'

In [None]:
md.print()

In [None]:
univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=md, chunk_size=5000)

In [None]:
univariate_calculator.fit(reference_data=reference)

In [None]:
fdata = pd.concat([reference, analysis], ignore_index=True)
fdata

In [None]:
univariate_results = univariate_calculator.calculate(data=fdata)

In [None]:
univariate_results_columns = list(univariate_results.columns)
univariate_results_columns

In [None]:
print(reference.iloc[5:, :9].to_markdown(tablefmt="grid"))

In [None]:
ranker = nml.Ranker.by('alert_count')
ranked_features = ranker.rank(univariate_results, only_drifting=False)
ranked_features

In [None]:
print(ranked_features.to_markdown(tablefmt="grid"))

In [None]:
rcerror_calculator = nml.DataReconstructionDriftCalculator(model_metadata=md, chunk_size=5000)

In [None]:
rcerror_calculator.fit(reference_data=reference)

In [None]:
rcerror_results = rcerror_calculator.calculate(data=fdata)
rcerror_results

In [None]:
plots = nml.DriftPlots(model_metadata=univariate_calculator.model_metadata, chunker=univariate_calculator.chunker)

In [None]:
for feature in md.features:

    figure = plots.plot_univariate_statistical_drift(univariate_results, metric='statistic', feature_label=feature.label)
    figure.show()
    figure.write_image(file=f"drift-guide-{feature.label}.svg")

In [None]:
for feature in md.continuous_features:

    figure = plots.plot_continuous_feature_distribution_over_time(
        data=pd.concat([reference, analysis], ignore_index=True),
        drift_results=univariate_results,
        feature_label=feature.label
    )
    figure.show()
    figure.write_image(file=f"drift-guide-joyplot-{feature.label}.svg")

In [None]:
for feature in md.categorical_features:

    figure = plots.plot_categorical_feature_distribution_over_time(
        data=pd.concat([reference, analysis], ignore_index=True),
        drift_results=univariate_results,
        feature_label=feature.label
    )
    figure.show()
    figure.write_image(file=f"drift-guide-stacked-{feature.label}.svg")

In [None]:
figure = plots.plot_data_reconstruction_drift(rcerror_results)
figure.show()
figure.write_image(file=f"drift-guide-multivariate.svg")

In [None]:
figure = plots.plot_univariate_statistical_prediction_drift(univariate_results, metric='statistic')
figure.show()
figure.write_image('drift-guide-predictions.svg')

In [None]:
figure = plots.plot_prediction_distribution_over_time(
    data=pd.concat([reference, analysis], ignore_index=True),
    drift_results=univariate_results)
figure.show()
figure.write_image('drift-guide-predictions-joyplot.svg')

In [None]:
# fit estimator and estimate
cbpe = nml.CBPE(model_metadata=md, chunk_size=5000)
cbpe.fit(reference_data=reference)
est_perf = cbpe.estimate(data=fdata)

In [None]:
# show results
plots = nml.PerformancePlots(model_metadata=md, chunker=cbpe.chunker)
figure = plots.plot_cbpe_performance_estimation(est_perf)
figure.show()
figure.write_image(file=f"perf-est-guide-syth-example.svg")