# NannyML Workflow

In [None]:
!rm *.svg
!ls -la

In [None]:
import nannyml as nml
import pandas as pd

In [None]:
nml.__version__

In [None]:
reference, analysis, analysis_target = nml.load_synthetic_sample()
data = pd.concat([reference, analysis], ignore_index=True)
print(reference.head().to_markdown(tablefmt="grid"))

In [None]:
print(analysis.tail().to_markdown(tablefmt="grid"))

In [None]:
# Let's use a chunk size of 5000
chunk_size = 5000

In [None]:
metadata = nml.extract_metadata(data = reference, model_name='wfh_predictor')
metadata.target_column_name = 'work_home_actual'
metadata.print()

In [None]:
# fit estimator and estimate
estimator = nml.CBPE(model_metadata=metadata, chunk_size=chunk_size)
estimator.fit(reference_data=reference)
estimated_performance = estimator.estimate(data=data)

In [None]:
# show results
figure = estimated_performance.plot(kind='performance')
figure.show()
# figure.write_image(file=f"perf-est-guide-syth-example.svg")

In [None]:
univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=metadata, chunk_size=chunk_size)

In [None]:
univariate_calculator.fit(reference_data=reference)

In [None]:
univariate_results = univariate_calculator.calculate(data=data)

In [None]:
for feature in metadata.features:

    figure = univariate_results.plot(kind='feature_drift', metric='statistic', feature_label=feature.label)
    figure.show()
#     figure.write_image(file=f"drift-guide-{feature.label}.svg")

In [None]:
for feature in metadata.features:

    figure = univariate_results.plot('feature_distribution',
        feature_label=feature.label
    )
    figure.show()
#     figure.write_image(file=f"drift-guide-joyplot-{feature.label}.svg")

In [None]:
# see model output drift
figure = univariate_results.plot(kind='prediction_drift', metric='statistic')
figure.show()

In [None]:
ranker = nml.Ranker.by('alert_count')
ranked_features = ranker.rank(univariate_results, model_metadata=metadata, only_drifting = False)
ranked_features

In [None]:
print(ranked_features.to_markdown(tablefmt="grid"))

In [None]:
figure = univariate_results.plot(kind='prediction_drift', metric='statistic')
figure.show()
figure.write_image('drift-guide-predictions.svg')

In [None]:
# Let's initialize the object that will perform Data Reconstruction with PCA
# Let's use a chunk size of 5000 data points to create our drift statistics
rcerror_calculator = nml.DataReconstructionDriftCalculator(model_metadata=metadata, chunk_size=chunk_size)

In [None]:
# NannyML compares drift versus the full reference dataset.
rcerror_calculator.fit(reference_data=reference)

In [None]:
# let's see RC error statistics for all available data
rcerror_results = rcerror_calculator.calculate(data=data)

In [None]:
print(rcerror_results.data.to_markdown(tablefmt="grid"))

In [None]:
figure = rcerror_results.plot(kind='drift')
figure.show()
figure.write_image(file=f"drift-guide-multivariate.svg")