In [None]:
import pandas as pd
import nannyml as nml

reference, analysis, analysis_target = nml.datasets.load_synthetic_sample()
reference['y_pred'] = reference['y_pred_proba'].map(lambda p: int(p >= 0.8))
analysis['y_pred'] = analysis['y_pred_proba'].map(lambda p: int(p >= 0.8))

reference.head(3)

In [None]:
def showrst(df):
    print(df.to_markdown(tablefmt="grid"))

In [None]:
data = pd.concat([reference, analysis.set_index('identifier').join(analysis_target.set_index('identifier'), on='identifier', rsuffix='_r')], ignore_index=True).reset_index(drop=True)
showrst(data.loc[data['partition'] == 'analysis'].head(3))

In [None]:
metadata = nml.extract_metadata(reference)
metadata.target_column_name = 'work_home_actual'
metadata.to_df()

In [None]:
target_distribution_calculator = nml.TargetDistributionCalculator(model_metadata=metadata, chunk_size=5000)
target_distribution_calculator.fit(reference_data=reference)

In [None]:
target_distribution = target_distribution_calculator.calculate(data)

In [None]:
showrst(target_distribution.data.head(3))

In [None]:
fig = target_distribution.plot(kind='distribution', distribution='metric')
fig.write_image(file="../_static/target_distribution_metric.svg", engine="kaleido")
fig.show()

In [None]:
fig = target_distribution.plot(kind='distribution', distribution='statistical')
fig.write_image(file="../_static/target_distribution_statistical.svg", engine="kaleido")
fig.show()
