In [2]:
# Demo: Fairness Pipeline Development Toolkit (Measurement + Pipeline)
from pathlib import Path
import json
import pandas as pd

from fairness_pipeline_dev_toolkit.pipeline.config import load_config, PipelineConfig
from fairness_pipeline_dev_toolkit.pipeline.orchestration.engine import run_detectors, build_pipeline, apply_pipeline

print("Toolkit imports OK.")

Toolkit imports OK.


In [3]:
# Prepare a small demo dataframe
df = pd.DataFrame({
    "group": ["A","A","B","B","A","B","A","B","A","B"],
    "x1":    [0.1,0.2,0.3,0.4,0.15,0.35,0.12,0.33,0.18,0.28],
    "x2":    [1,2,3,3,2,3,1,2,2,3],
    "y":     [0,1,0,1,0,1,0,1,0,1],
})
df.head()

Unnamed: 0,group,x1,x2,y
0,A,0.1,1,0
1,A,0.2,2,1
2,B,0.3,3,0
3,B,0.4,3,1
4,A,0.15,2,0


In [4]:
# Build a config in-memory (mirrors your YAML schema)
cfg_text = '''
sensitive: ["group"]
alpha: 0.05
proxy_threshold: 0.30
report_out: "artifacts_demo_report.json"
benchmarks:
  group: {A: 0.5, B: 0.5}
pipeline:
  - name: reweigh
    transformer: "InstanceReweighting"
    params: {}
  - name: di
    transformer: "DisparateImpactRemover"
    params:
      features: ["x1"]
      sensitive: "group"
      repair_level: 0.8
'''
cfg = load_config(text=cfg_text)
cfg

PipelineConfig(sensitive=['group'], benchmarks={'group': {'A': 0.5, 'B': 0.5}}, alpha=0.05, proxy_threshold=0.3, report_out='artifacts_demo_report.json', pipeline=[PipelineStep(name='reweigh', transformer='InstanceReweighting', params={}), PipelineStep(name='di', transformer='DisparateImpactRemover', params={'features': ['x1'], 'sensitive': 'group', 'repair_level': 0.8})])

In [5]:
# Run detectors (representation / disparities / proxies)
det_report = run_detectors(df, cfg)
print(json.dumps(det_report.to_dict(), indent=2)[:1000] + "\n...")

{
  "meta": {
    "phase": "0",
    "alpha": 0.05,
    "proxy_threshold": 0.3
  },
  "summary": {
    "sensitive": [
      "group"
    ],
    "alpha": 0.05,
    "proxy_threshold": 0.3,
    "representation_flags": 0,
    "disparity_flags": 2,
    "proxy_flags": 3
  },
  "representation": [
    {
      "attribute": "group",
      "counts": {
        "A": 5,
        "B": 5
      },
      "proportions": {
        "A": 0.5,
        "B": 0.5
      },
      "benchmark": {
        "A": 0.5,
        "B": 0.5
      },
      "chi2_pvalue": 1.0,
      "flagged": false
    }
  ],
  "disparities": [
    {
      "feature": "x1",
      "attribute": "group",
      "test": "anova",
      "pvalue": 0.00018000524340134393,
      "flagged": true
    },
    {
      "feature": "x2",
      "attribute": "group",
      "test": "chi2",
      "pvalue": 0.0301973834223185,
      "flagged": true
    },
    {
      "feature": "y",
      "attribute": "group",
      "test": "chi2",
      "pvalue": 0.05777957112359719,

In [6]:
# Build and apply pipeline
pipe = build_pipeline(cfg)
df_out, artifacts = apply_pipeline(pipe, df)
print("Transformed shape:", df_out.shape)
print("Artifacts:", artifacts)
df_out.head()

Transformed shape: (10, 4)
Artifacts: {'sample_weight': array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])}


Unnamed: 0,group,x1,x2,y
0,A,0.1,1,0
1,A,0.2,2,1
2,B,0.3,3,0
3,B,0.4,3,1
4,A,0.15,2,0


In [7]:
# Optional: log to MLflow if available
try:
    import mlflow
    from fairness_pipeline_dev_toolkit.integration.mlflow_logger import log_fairness

    mlflow.set_experiment("fairness_demo")
    with mlflow.start_run(run_name="phase5_demo"):
        log_fairness(det_report.to_dict())
    print("Logged fairness report to MLflow run.")
except Exception as e:
    print("MLflow not available or logging failed (skipped):", e)

MLflow not available or logging failed (skipped): cannot import name 'log_fairness' from 'fairness_pipeline_dev_toolkit.integration.mlflow_logger' (/Users/jobdulo/Documents/Turing/Sprint 4/fairness_pipeline_dev_toolkit/fairness_pipeline_dev_toolkit/integration/mlflow_logger.py)
