In [None]:
import nannyml as nml

import pandas as pd

from IPython.display import display

reference, analysis, analysis_target = nml.load_synthetic_binary_classification_dataset()

metadata = nml.extract_metadata(data = reference, model_name='wfh_predictor', model_type='classification_binary', exclude_columns=['identifier'])

metadata.target_column_name = 'work_home_actual'

display(reference.head())

# Let's initialize the object that will perform the Univariate Drift calculations

# Let's use a chunk size of 5000 data points to create our drift statistics

univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=metadata, chunk_size=5000)

univariate_calculator = univariate_calculator.fit(reference_data=reference)

# let's see drift statistics for all available data

data = pd.concat([reference, analysis], ignore_index=True)

univariate_results = univariate_calculator.calculate(data=data)

# let's view a small subset of our results:

# We use the data property of the results class to view the relevant data.

display(univariate_results.data.iloc[:5, :9])

display(univariate_results.data.iloc[-5:, :9])

# let's plot drift results for all model inputs

for feature in metadata.features:

    figure = univariate_results.plot(kind='feature_drift', metric='statistic', feature_label=feature.label)

    figure.show()

# let's plot distribution drift results for continuous model inputs

for feature in metadata.continuous_features:

    figure = univariate_results.plot(

        kind='feature_distribution',

        feature_label=feature.label

    )

    figure.show()

# let's plot distribution drift results for categorical model inputs

for feature in metadata.categorical_features:

    figure = univariate_results.plot(

        kind='feature_distribution',

        feature_label=feature.label

    )

    figure.show()

ranker = nml.Ranker.by('alert_count')

ranked_features = ranker.rank(univariate_results, model_metadata=metadata, only_drifting = False)

display(ranked_features)

In [None]:
import nannyml as nml

import pandas as pd

from IPython.display import display

reference, analysis, analysis_target = nml.load_synthetic_binary_classification_dataset()

metadata = nml.extract_metadata(data = reference, model_name='wfh_predictor', model_type='classification_binary', exclude_columns=['identifier'])

metadata.target_column_name = 'work_home_actual'

display(reference.head())

In [None]:
print(reference.head().to_markdown(tablefmt="grid"))

In [None]:
# Let's initialize the object that will perform the Univariate Drift calculations

# Let's use a chunk size of 5000 data points to create our drift statistics

univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=metadata, chunk_size=5000)

univariate_calculator = univariate_calculator.fit(reference_data=reference)

# let's see drift statistics for all available data

data = pd.concat([reference, analysis], ignore_index=True)

univariate_results = univariate_calculator.calculate(data=data)

# let's view a small subset of our results:

# We use the data property of the results class to view the relevant data.

display(univariate_results.data.iloc[:5, :9])

In [None]:
print(univariate_results.data.iloc[:5, :9].to_markdown(tablefmt="grid"))

In [None]:
display(univariate_results.data.iloc[-5:, :9])

In [None]:
print(univariate_results.data.iloc[-5:, :9].to_markdown(tablefmt="grid"))

In [None]:
# let's plot drift results for all model inputs

for feature in metadata.features:

    figure = univariate_results.plot(kind='feature_drift', metric='statistic', feature_label=feature.label)

    figure.show()
    # save figure - not shown on guide:
    figure.write_image(file=f"../_static/drift-guide-{feature.label}.svg")

In [None]:
# let's plot distribution drift results for continuous model inputs

for feature in metadata.continuous_features:

    figure = univariate_results.plot(

        kind='feature_distribution',

        feature_label=feature.label

    )

    figure.show()
    # save figure - not shown on guide:
    figure.write_image(file=f"../_static/drift-guide-joyplot-{feature.label}.svg")

In [None]:
# let's plot distribution drift results for categorical model inputs

for feature in metadata.categorical_features:

    figure = univariate_results.plot(

        kind='feature_distribution',

        feature_label=feature.label

    )

    figure.show()
    # save figure - not shown on guide:
    figure.write_image(file=f"../_static/drift-guide-stacked-{feature.label}.svg")

In [None]:
ranker = nml.Ranker.by('alert_count')

ranked_features = ranker.rank(univariate_results, model_metadata=metadata, only_drifting = False)

ranked_features

In [None]:
print(ranked_features.to_markdown(tablefmt="grid"))