In [None]:
import nannyml as nml
from IPython.display import display

reference_df = nml.load_synthetic_binary_classification_dataset()[0]
analysis_df = nml.load_synthetic_binary_classification_dataset()[1]
display(reference_df.head())

In [None]:
print(reference_df.head().to_markdown(tablefmt="grid"))

In [None]:
column_names = [
    col for col in reference_df.columns if col not in [
    'timestamp', 'period', 'work_home_actual', 'identifier'
]]

calc = nml.UnivariateDriftCalculator(
    column_names=column_names,
    timestamp_column_name='timestamp',
    categorical_methods=['jensen_shannon'],
    continuous_methods=['jensen_shannon'],
    
)

In [None]:
calc.fit(reference_df)
results = calc.calculate(analysis_df)
display(results.filter(period='analysis').to_df().iloc[:, :9])

In [None]:
print(results.filter(period='analysis').to_df().iloc[:, :9].to_markdown(tablefmt="grid"))

In [None]:
display(results.filter(period='reference').to_df().iloc[:, :9])

In [None]:
print(results.filter(period='reference').to_df().iloc[:, :9].to_markdown(tablefmt="grid"))

In [None]:
for feature in calc.column_names:
    drift_fig = results.plot(
        kind='feature_drift',
        column_name=feature,
        plot_reference=True)
    drift_fig.show()

In [None]:
for feature in calc.column_names:
    drift_fig = results.plot(
        kind='feature_drift',
        column_name=feature,
        plot_reference=True
    )
    drift_fig.write_image(f'../_static/drift-guide-{feature}.svg')

In [None]:
for cont_feat in calc.continuous_column_names:
    figure = results.plot(
        kind='feature_distribution',
        feature_column_name=cont_feat,
        plot_reference=True
    )
    figure.show()

In [None]:
for cont_feat in calc.continuous_column_names:
    figure = results.plot(
        kind='feature_distribution',
        feature_column_name=cont_feat,
        plot_reference=True
    )
    figure.write_image(f'../_static/drift-guide-joyplot-{cont_feat}.svg')

In [None]:
for cat_feat in calc.categorical_column_names:
    figure = results.plot(
        kind='feature_distribution',
        feature_column_name=cat_feat,
        plot_reference=True)
    figure.show()

In [None]:
for cat_feat in calc.categorical_column_names:
    figure = results.plot(
        kind='feature_distribution',
        feature_column_name=cat_feat,
        plot_reference=True)
    figure.write_image(f'../_static/drift-guide-stacked-{cat_feat}.svg')

In [None]:
ranker = nml.Ranker.by('alert_count')
ranked_features = ranker.rank(results, only_drifting = False)
display(ranked_features)

In [None]:
print(ranked_features.to_markdown(tablefmt="grid"))
