In [None]:
import pandas as pd
import nannyml as nml
reference, analysis, analysis_target = nml.load_synthetic_sample()
reference.head()

In [None]:
metadata = nml.extract_metadata(data = reference, model_name='wfh_predictor', exclude_columns=['identifier'])
metadata.target_column_name = 'work_home_actual'
data = pd.concat([reference, analysis], ignore_index=True)
# Let's use a chunk size of 5000 data points to create our drift statistics
chunk_size = 5000

In [None]:
analysis.head()

In [None]:
# fit estimator and estimate
estimator = nml.CBPE(model_metadata=metadata, chunk_size=chunk_size)
estimator.fit(reference)
estimated_performance = estimator.estimate(data=data)
# show results
figure = estimated_performance.plot(kind='performance')
figure.show()
# save figure - not shown on guide:
figure.write_image(file=f"perf-est-guide-syth-example.svg")

In [None]:
# Let's initialize the object that will perform the Univariate Drift calculations
univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=metadata, chunk_size=chunk_size)
univariate_calculator.fit(reference_data=reference)
univariate_results = univariate_calculator.calculate(data=data)
# let's plot drift results for all model inputs
for feature in metadata.features:
    figure = univariate_results.plot(kind='feature_drift', metric='statistic', feature_label=feature.label)
    figure.show()

In [None]:
ranker = nml.Ranker.by('alert_count')
ranked_features = ranker.rank(univariate_results, model_metadata=metadata, only_drifting = False)
ranked_features

In [None]:
figure = univariate_results.plot(kind='prediction_drift', metric='statistic')
figure.show()

In [None]:
# Let's initialize the object that will perform Data Reconstruction with PCA
rcerror_calculator = nml.DataReconstructionDriftCalculator(model_metadata=metadata, chunk_size=chunk_size)
# NannyML compares drift versus the full reference dataset.
rcerror_calculator.fit(reference_data=reference)
# let's see Reconstruction error statistics for all available data
rcerror_results = rcerror_calculator.calculate(data=data)
figure = rcerror_results.plot(kind='drift')
figure.show()