In [None]:
import nannyml as nml
import pandas as pd

reference, analysis, analysis_gt = nml.load_synthetic_sample()
metadata = nml.extract_metadata(data = reference, model_name='wfh_predictor')
metadata.target_column_name = 'work_home_actual'
reference.head()

In [None]:
# Let's initialize the object that will perform the Univariate Drift calculations
# Let's use a chunk size of 5000 data points to create our drift statistics
univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=metadata, chunk_size=5000)
# NannyML compares drift versus the full reference dataset.
univariate_calculator.fit(reference_data=reference)
# let's see drift statistics for all available data
data = pd.concat([reference, analysis], ignore_index=True)
univariate_results = univariate_calculator.calculate(data=data)
# let's view a small subset of our results:
univariate_results.data.iloc[:5, :9]

In [None]:
univariate_results.data.iloc[-5:, :9]

In [None]:
# let's plot drift results for all model inputs
for feature in metadata.features:
    figure = univariate_results.plot(kind='feature_drift', metric='statistic', feature_label=feature.label)
    figure.show()
    # save figure - not shown on guide:
    # figure.write_image(file=f"drift-guide-{feature.label}.svg")
    # figure.write_image(file=f"drift-guide-{feature.label}.svg", engine="orca")

In [None]:
# let's plot distribution drift results for continuous model inputs
for feature in metadata.continuous_features:
    figure = univariate_results.plot(
        kind='feature_distribution',
        feature_label=feature.label
    )
    figure.show()
    # save figure - not shown on guide:
    # figure.write_image(file=f""drift-guide-joyplot-{feature.label}.svg")
    # figure.write_image(file=f"drift-guide-joyplot-{feature.label}.svg", engine="orca")

In [None]:
# let's plot distribution drift results for categorical model inputs
for feature in metadata.categorical_features:
    figure = univariate_results.plot(
        kind='feature_distribution',
        feature_label=feature.label
    )
    figure.show()
    # save figure - not shown on guide:
    # figure.write_image(file=f""drift-guide-stacked-{feature.label}.svg")
    # figure.write_image(file=f""drift-guide-stacked-{feature.label}.svg", engine="orca")

In [None]:
ranker = nml.Ranker.by('alert_count')
ranked_features = ranker.rank(univariate_results, model_metadata=metadata, only_drifting = False)
ranked_features

In [None]:
figure = univariate_results.plot(kind='prediction_drift', metric='statistic')
figure.show()

In [None]:
figure = univariate_results.plot(kind='prediction_distribution', metric='statistic')
figure.show()
# figure.write_image(file=f"drift-guide-predictions-joyplot.svg", engine="orca")

In [None]:
# Let's initialize the object that will perform Data Reconstruction with PCA
# Let's use a chunk size of 5000 data points to create our drift statistics
rcerror_calculator = nml.DataReconstructionDriftCalculator(model_metadata=metadata, chunk_size=5000)
# NannyML compares drift versus the full reference dataset.
rcerror_calculator.fit(reference_data=reference)
# let's see RC error statistics for all available data
rcerror_results = rcerror_calculator.calculate(data=data)

In [None]:
from sklearn.impute import SimpleImputer

# Let's initialize the object that will perform Data Reconstruction with PCA
rcerror_calculator = nml.DataReconstructionDriftCalculator(
    model_metadata=metadata,
    chunk_size=5000,
    imputer_categorical=SimpleImputer(strategy='constant', fill_value='missing'),
    imputer_continuous=SimpleImputer(strategy='median')
)
# NannyML compares drift versus the full reference dataset.
rcerror_calculator.fit(reference_data=reference)
# let's see RC error statistics for all available data
rcerror_results = rcerror_calculator.calculate(data=data)

In [None]:
rcerror_results.data

In [None]:
print(rcerror_results.data.to_markdown(tablefmt="grid"))

In [None]:
figure = rcerror_results.plot(kind='drift')
figure.show()
# save figure - not shown on guide:
# figure.write_image(file="drift-guide-multivariate.svg")