# NannyML Workflow

In [1]:
import nannyml as nml
import pandas as pd

In [2]:
def show_df_rst(df):
    """
    Show a dataframe as rst output:
    """
    print(df.head().to_markdown(tablefmt="grid"))

In [3]:
reference, analysis, analysis_gt = nml.load_synthetic_sample()
show_df_rst(reference)

+----+------------------------+----------------+-----------------------+------------------------------+--------------------+-----------+----------+--------------+--------------------+---------------------+----------------+-------------+
|    |   distance_from_office | salary_range   |   gas_price_per_litre |   public_transportation_cost | wfh_prev_workday   | workday   |   tenure |   identifier |   work_home_actual | timestamp           |   y_pred_proba | partition   |
|  0 |               5.96225  | 40K - 60K €    |               2.11948 |                      8.56806 | False              | Friday    | 0.212653 |            0 |                  1 | 2014-05-09 22:27:20 |           0.99 | reference   |
+----+------------------------+----------------+-----------------------+------------------------------+--------------------+-----------+----------+--------------+--------------------+---------------------+----------------+-------------+
|  1 |               0.535872 | 40K - 60K €    |    

In [None]:
md = nml.extract_metadata(data = reference, model_name='wfh_predictor')

In [None]:
print(md.print())
# md.print()

In [None]:
md.timestamp_column_name = 'timestamp'
# md.prediction_column_name = 'y_pred_proba'
md.ground_truth_column_name = 'work_home_actual'

In [None]:
print(md.print())

In [None]:
univariate_calculator = nml.UnivariateStatisticalDriftCalculator(model_metadata=md, chunk_size=5000)

In [None]:
univariate_calculator.fit(reference_data=reference)

In [None]:
fdata = pd.concat([reference, analysis], ignore_index=True)
fdata

In [None]:
univariate_results = univariate_calculator.calculate(data=fdata)

In [None]:
univariate_results_columns = list(univariate_results.columns)
univariate_results_columns

In [None]:
show_df_rst(univariate_results.iloc[5:, :9])

In [None]:
ranking = nml.Ranker(by='alert_count')
ranked_features = ranking.rank(univariate_results, only_drifted = False)
# ranked_features

In [None]:
# 
a1 = nml.rank_drifted_features(univariate_results)
print(a1.to_markdown(tablefmt="grid"))

In [None]:
rcerror_calculator = nml.DataReconstructionDriftCalculator(model_metadata=md, chunk_size=5000)

In [None]:
rcerror_calculator.fit(reference_data=reference)

In [None]:
rcerror_results = rcerror_calculator.calculate(data=fdata)
rcerror_results

In [None]:
plots = nml.DriftPlots(model_metadata=univariate_calculator.model_metadata, chunker=univariate_calculator.chunker)

In [None]:
for itm in md.features:

    fig = plots.plot_univariate_statistical_drift(univariate_results, metric='statistic', feature_label=itm.label)
    fig.show()
    fig.write_image(file=f"drift-guide-{itm.label}.svg")

In [None]:
for itm in md.continuous_features:

    fig = plots.plot_continuous_feature_distribution_over_time(
        data=pd.concat([reference, analysis], ignore_index=True),
        drift_results=univariate_results,
        feature_label=itm.label
    )
    fig.show()
    fig.write_image(file=f"drift-guide-joyplot-{itm.label}.svg")

In [None]:
for itm in md.categorical_features:

    fig = plots.plot_categorical_feature_distribution_over_time(
        data=pd.concat([reference, analysis], ignore_index=True),
        drift_results=univariate_results,
        feature_label=itm.label
    )
    fig.show()
    fig.write_image(file=f"drift-guide-stacked-{itm.label}.svg")

In [None]:
fig = plots.plot_data_reconstruction_drift(rcerror_results)
fig.show()
fig.write_image(file=f"drift-guide-multivariate.svg")

In [None]:
# fit estimator and estimate
cbpe = nml.CBPE(model_metadata=md, chunk_size=5000)
cbpe.fit(reference_data=reference)
est_perf = cbpe.estimate(data=fdata)

In [None]:
# show results
plots = nml.PerformancePlots(model_metadata=md, chunker=cbpe.chunker)
fig = plots.plot_cbpe_performance_estimation(est_perf)
fig.show()
fig.write_image(file=f"perf-est-guide-syth-example.svg")