# Assignment 5.1 — ML System Observability: Model Bias Monitor

# Nitin Kumar Mishra 
# 07.10.2025

Purpose:
Set up a SageMaker Model Bias Monitor for a deployed model (e.g., XGBoost churn model) to identify bias, monitor feature drift, and produce a bias report for submission.

This notebook is targeted to sagemaker==2.245.0.

# Step 1 — Setup Environment

Initialize the SageMaker session, execution role, and default S3 bucket that will store baseline data and reports.

In [8]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.s3 import S3Uploader
import pandas as pd
import boto3
from datetime import datetime
from time import sleep

# --- Basic SageMaker and AWS Setup ---
session = sagemaker.Session()
region  = session.boto_region_name
role    = get_execution_role()
s3_client = boto3.client("s3")

# --- S3 Bucket and Prefixes ---
bucket  = session.default_bucket()
prefix  = "assignment5-bias-monitor" # Using a fresh prefix

# Paths for all artifacts
model_s3_key = f"s3://{bucket}/{prefix}/model"
data_capture_s3_key = f"s3://{bucket}/{prefix}/datacapture"
baseline_data_s3_key = f"s3://{bucket}/{prefix}/baseline"
reports_s3_key = f"s3://{bucket}/{prefix}/reports"

print(f"SageMaker SDK Version: {sagemaker.__version__}")
print(f"Bucket: {bucket}")
print(f"Role: {role}")

SageMaker SDK Version: 2.245.0
Bucket: sagemaker-us-east-1-533267190630
Role: arn:aws:iam::533267190630:role/LabRole


# Step 2: Deploy the Pre-trained Model

Upload the pre-trained XGBoost model to S3 and deploy it to a real-time SageMaker endpoint. A deployed model is required to perform post-training bias analysis.

In [9]:
from sagemaker.model import Model
from sagemaker import image_uris

# Upload the pre-trained model artifact from the lab files to S3
model_url = S3Uploader.upload("model/xgb-churn-prediction-model.tar.gz", model_s3_key)
print(f"Model artifact uploaded to: {model_url}")

# Define the model name and endpoint name
model_name = f"churn-bias-model-{datetime.utcnow():%Y-%m-%d-%H%M}"
endpoint_name = f"churn-bias-endpoint-{datetime.utcnow():%Y-%m-%d-%H%M}"

# Get the container image URI for XGBoost
image_uri = image_uris.retrieve(
    framework="xgboost",
    version="0.90-1", # As used in the lab notebook
    region=region
)

# Create the SageMaker Model object
model = Model(
    image_uri=image_uri,
    model_data=model_url,
    role=role,
    sagemaker_session=session,
    name=model_name
)

# Deploy the model to an endpoint
print(f"\nDeploying model '{model_name}' to endpoint '{endpoint_name}'...")
model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge",
    endpoint_name=endpoint_name,
)
print("Deployment complete!")

Model artifact uploaded to: s3://sagemaker-us-east-1-533267190630/assignment5-bias-monitor/model/xgb-churn-prediction-model.tar.gz

Deploying model 'churn-bias-model-2025-10-07-1944' to endpoint 'churn-bias-endpoint-2025-10-07-1944'...


  model_name = f"churn-bias-model-{datetime.utcnow():%Y-%m-%d-%H%M}"
  endpoint_name = f"churn-bias-endpoint-{datetime.utcnow():%Y-%m-%d-%H%M}"


-----!Deployment complete!


# Step 3: Prepare the Baseline Dataset

The Clarify job requires a dataset containing only the features to send to the model for predictions. We will load the `validation.csv` file, separate the features from the first column (the ground truth label), and upload the features-only dataset to S3.

### Inspect the Baseline Dataset

Before running the final analysis, let's load the `validation.csv` file into a pandas DataFrame and display the first few rows. This will allow us to visually inspect the columns and confirm that `col_1` is continuous (many different numbers) and `col_11` is categorical (only 0s and 1s).

In [13]:
import pandas as pd

# Load the validation dataset, which does not have a header
df_inspect = pd.read_csv("test_data/validation.csv", header=None)

# Assign generic column names for clarity
df_inspect.columns = [f"col_{i}" for i in range(len(df_inspect.columns))]

# Display the first 5 rows
print("Displaying the first 5 rows of the dataset:")
display(df_inspect.head())

Displaying the first 5 rows of the dataset:


Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,...,col_60,col_61,col_62,col_63,col_64,col_65,col_66,col_67,col_68,col_69
0,0,47,28,141.3,94,168.0,108,113.5,84,7.8,...,0,0,0,1,0,0,1,0,0,1
1,0,30,0,247.4,107,175.9,76,287.4,90,11.3,...,0,0,0,0,1,0,1,0,1,0
2,0,106,32,165.9,126,216.5,93,173.1,86,14.1,...,0,0,0,1,0,0,1,0,0,1
3,0,131,0,240.9,108,167.4,91,322.2,109,14.7,...,0,0,0,1,0,0,1,0,1,0
4,0,83,37,78.5,109,210.5,101,179.7,102,11.8,...,0,1,0,0,1,0,1,0,0,1


### Inspect Dataset with Actual Headers

To better understand our data, let's apply the original, descriptive column headers to our dataset. This will show us the real-world meaning of columns like `col_1` (Account Length) and `col_11` (International Plan).

In [15]:
import pandas as pd

# This is the complete and correct list of 70 column headers.
# It includes 'Area Code' and all 51 one-hot encoded state columns.
column_names = [
    'Churn', 'Account Length', 'Area Code', 'VMail Message', 'Day Mins',
    'Eve Mins', 'Night Mins', 'Intl Mins', 'CustServ Calls', 'Intl Plan',
    'VMail Plan', 'Day Calls', 'Day Charge', 'Eve Calls', 'Eve Charge',
    'Night Calls', 'Night Charge', 'Intl Calls', 'Intl Charge', 'State_AK',
    'State_AL', 'State_AR', 'State_AZ', 'State_CA', 'State_CO', 'State_CT',
    'State_DC', 'State_DE', 'State_FL', 'State_GA', 'State_HI', 'State_IA',
    'State_ID', 'State_IL', 'State_IN', 'State_KS', 'State_KY', 'State_LA',
    'State_MA', 'State_MD', 'State_ME', 'State_MI', 'State_MN', 'State_MO',
    'State_MS', 'State_MT', 'State_NC', 'State_ND', 'State_NE', 'State_NH',
    'State_NJ', 'State_NM', 'State_NV', 'State_NY', 'State_OH', 'State_OK',
    'State_OR', 'State_PA', 'State_RI', 'State_SC', 'State_SD', 'State_TN',
    'State_TX', 'State_UT', 'State_VA', 'State_VT', 'State_WA', 'State_WI',
    'State_WV', 'State_WY'
]


# Load the validation dataset, which does not have a header
df_inspect_real_names = pd.read_csv("test_data/validation.csv", header=None)

# Check the number of columns to be sure
print(f"DataFrame has {df_inspect_real_names.shape[1]} columns.")
print(f"Header list has {len(column_names)} names.")

# Assign the descriptive column names
df_inspect_real_names.columns = column_names

# Display the first 5 rows with the real headers
print("\nDisplaying the first 5 rows with actual column headers:")
display(df_inspect_real_names.head())

DataFrame has 70 columns.
Header list has 70 names.

Displaying the first 5 rows with actual column headers:


Unnamed: 0,Churn,Account Length,Area Code,VMail Message,Day Mins,Eve Mins,Night Mins,Intl Mins,CustServ Calls,Intl Plan,...,State_SD,State_TN,State_TX,State_UT,State_VA,State_VT,State_WA,State_WI,State_WV,State_WY
0,0,47,28,141.3,94,168.0,108,113.5,84,7.8,...,0,0,0,1,0,0,1,0,0,1
1,0,30,0,247.4,107,175.9,76,287.4,90,11.3,...,0,0,0,0,1,0,1,0,1,0
2,0,106,32,165.9,126,216.5,93,173.1,86,14.1,...,0,0,0,1,0,0,1,0,0,1
3,0,131,0,240.9,108,167.4,91,322.2,109,14.7,...,0,0,0,1,0,0,1,0,1,0
4,0,83,37,78.5,109,210.5,101,179.7,102,11.8,...,0,1,0,0,1,0,1,0,0,1


# Step 4: Configure and Run the Bias Analysis Job

Using the `sagemaker.clarify.SageMakerClarifyProcessor`, we will configure and launch the post-training bias analysis. This involves defining:
1.  `ModelConfig`: Specifies the deployed endpoint to use for getting predictions.
2.  `DataConfig`: Describes the input data, output location, and column headers/indices.
3.  `BiasConfig`: Defines what we are measuring bias for (the positive label and the facet/protected attribute).

In [None]:
import sagemaker
from sagemaker import clarify
import pandas as pd
from sagemaker.s3 import S3Uploader

# --- Step 3 (from before, no changes needed) ---
df_baseline = pd.read_csv("test_data/validation.csv", header=None)
all_headers = [f"col_{i}" for i in range(len(df_baseline.columns))]
df_baseline.columns = all_headers
df_baseline.to_csv("baseline_with_header.csv", header=True, index=False)
baseline_full_uri = S3Uploader.upload("baseline_with_header.csv", baseline_data_s3_key)
print(f"Full baseline data with header uploaded to: {baseline_full_uri}")
# --- End of Step 3 ---


# --- Step 4: Configure and Run the Bias Analysis Job ---

# 1. Create the SageMakerClarifyProcessor (no changes)
clarify_processor = clarify.SageMakerClarifyProcessor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    sagemaker_session=session,
)

# 2. Create the ModelConfig (no changes)
model_config = clarify.ModelConfig(
    model_name=model_name,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    accept_type="text/csv",
    content_type="text/csv",
    content_template="$features"
)

# 3. Create the DataConfig (no changes)
data_config = clarify.DataConfig(
    s3_data_input_path=baseline_full_uri,
    s3_output_path=reports_s3_key,
    label="col_0",
    headers=all_headers,
    dataset_type="text/csv",
)

# 4. Create the BiasConfig (***THIS IS THE CORRECTED PART***)
# We are now using 'col_11' which is a categorical feature (0 or 1).
bias_config = clarify.BiasConfig(
    label_values_or_threshold=[1],  # Positive outcome is churn='1'
    facet_name="col_11",            # **FIX:** Use a categorical column for the facet.
)

print("\nStarting Clarify job for post-training bias analysis...")

# 5. Run the job (no changes)
clarify_processor.run_bias(
    data_config=data_config,
    bias_config=bias_config,
    model_config=model_config,
    post_training_methods="all",
    wait=True,
    logs=True,
)

print(f"\nJob completed! You can find the bias report in: {reports_s3_key}")

In [16]:
import sagemaker
from sagemaker import clarify
import pandas as pd
from sagemaker.s3 import S3Uploader

# --- Step 3: Prepare the Baseline Dataset with Headers ---
# The Clarify job needs the full dataset (features + label) with a header.

# 1. Load the original validation data (which has no header).
df_baseline = pd.read_csv("test_data/validation.csv", header=None)

# 2. Create a full list of 70 headers ('col_0' through 'col_69').
all_headers = [f"col_{i}" for i in range(len(df_baseline.columns))]
df_baseline.columns = all_headers

# 3. Save the full dataframe with headers to a new CSV.
df_baseline.to_csv("baseline_with_header.csv", header=True, index=False)

# 4. Upload this complete, 70-column file to S3. This is our true job input.
baseline_full_uri = S3Uploader.upload("baseline_with_header.csv", baseline_data_s3_key)
print(f"Full baseline data with header uploaded to: {baseline_full_uri}")
# --- End of Step 3 ---


# --- Step 4: Configure and Run the Bias Analysis Job ---

# 1. Create the SageMakerClarifyProcessor (no changes)
clarify_processor = clarify.SageMakerClarifyProcessor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    sagemaker_session=session,
)

# 2. Create the ModelConfig (no changes)
model_config = clarify.ModelConfig(
    model_name=model_name,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    accept_type="text/csv",
    content_type="text/csv",
    content_template="$features"
)

# 3. Create the DataConfig (no changes)
data_config = clarify.DataConfig(
    s3_data_input_path=baseline_full_uri,
    s3_output_path=reports_s3_key,
    label="col_0",
    headers=all_headers,
    dataset_type="text/csv",
)

# 4. Create the BiasConfig (***THIS IS THE CORRECTED PART***)
# We are now using 'col_11' which is a categorical feature (0 or 1).
bias_config = clarify.BiasConfig(
    label_values_or_threshold=[1],  # Positive outcome is churn='1'
    facet_name="col_11",            # **FIX:** Use a categorical column for the facet.
)

print("\nStarting Clarify job for post-training bias analysis...")

# 5. Run the job (no changes)
clarify_processor.run_bias(
    data_config=data_config,
    bias_config=bias_config,
    model_config=model_config,
    post_training_methods="all",
    wait=True,
    logs=True,
)

print(f"\nJob completed! You can find the bias report in: {reports_s3_key}")

INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker.clarify:Analysis Config: {'dataset_type': 'text/csv', 'headers': ['col_0', 'col_1', 'col_2', 'col_3', 'col_4', 'col_5', 'col_6', 'col_7', 'col_8', 'col_9', 'col_10', 'col_11', 'col_12', 'col_13', 'col_14', 'col_15', 'col_16', 'col_17', 'col_18', 'col_19', 'col_20', 'col_21', 'col_22', 'col_23', 'col_24', 'col_25', 'col_26', 'col_27', 'col_28', 'col_29', 'col_30', 'col_31', 'col_32', 'col_33', 'col_34', 'col_35', 'col_36', 'col_37', 'col_38', 'col_39', 'col_40', 'col_41', 'col_42', 'col_43', 'col_44', 'col_45', 'col_46', 'col_47', 'col_48', 'col_49', 'col_50', 'col_51', 'col_52', 'col_53', 'col_54', 'col_55', 'col_56', 'col_57', 'col_58', 'col_59', 'col_60', 'col_61', 'col_62', 'col_63', 'col_64', 'col_65', 'col_66', 'col_67', 'col_68', 'col_69'], 'label': 'col_0', 'label_values_or_threshold': [1], 'facet': [{'name_or_index': 'col_11'}], 'methods': {'report': {'name': 'report', 'title': 'Analysis Report'}

Full baseline data with header uploaded to: s3://sagemaker-us-east-1-533267190630/assignment5-bias-monitor/baseline/baseline_with_header.csv

Starting Clarify job for post-training bias analysis...
....................[34msagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml[0m
[34msagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml[0m
[34mWe are not in a supported iso region, /bin/sh exiting gracefully with no changes.[0m
[34mINFO:sagemaker-clarify-processing:Starting SageMaker Clarify Processing job[0m
[34mINFO:analyzer.data_loading.data_loader_util:Analysis config path: /opt/ml/processing/input/config/analysis_config.json[0m
[34mINFO:analyzer.data_loading.data_loader_util:Analysis result path: /opt/ml/processing/output[0m
[34mINFO:analyzer.data_loading.data_loader_util:This host is algo-1.[0m
[34mINFO:analyzer.data_loading.data_loader_util:This host is the leader.[0m
[34mINFO:

# Step 5: View and Download the Bias Report

The Clarify job generates a comprehensive report in PDF format and a machine-readable `analysis.json` file containing all the computed metrics. We will first inspect the key metrics within the notebook by parsing the JSON file, and then download the PDF report.

In [18]:
import json
from sagemaker.s3 import S3Downloader
import pandas as pd

# The S3 path where the report was saved
report_s3_path = reports_s3_key

print(f"Report files are located in: {report_s3_path}\n")

# --- Part 1: Download and Display Metrics in the Notebook ---
analysis_json_path = "analysis.json"
print(f"Downloading {analysis_json_path} for inspection...")
S3Downloader.download(f"{report_s3_path}/{analysis_json_path}", ".")

with open(analysis_json_path, "r") as f:
    analysis_results = json.load(f)

post_training_metrics = analysis_results.get("post_training_bias_metrics")

if post_training_metrics:
    print("\n--- Post-Training Bias Metrics ---")
    
    all_metrics = []
    # **FIX:** Instead of accessing by index [0], we loop through the dictionary of facets.
    # The keys '0.0' and '1.0' represent our groups (Intl Plan vs. No Intl Plan).
    facets = post_training_metrics.get('report', {}).get('facets', {})
    
    for facet_value, facet_data in facets.items():
        # Create a DataFrame for the metrics of this group
        df = pd.DataFrame(facet_data['metrics'])
        # Add a column to know which group these metrics belong to
        df['group'] = facet_value
        all_metrics.append(df)

    if all_metrics:
        # Combine the metrics from all groups into one table
        metrics_df = pd.concat(all_metrics).set_index('group')
        display(metrics_df)
    else:
        print("No facets found in the report.")
else:
    print("Could not find post-training bias metrics in the report.")


# --- Part 2: Download the PDF Report for Submission ---
report_pdf_path = "report.pdf"
print(f"\nDownloading the final {report_pdf_path} for submission...")
S3Downloader.download(f"{report_s3_path}/{report_pdf_path}", ".")

print(f"\nSUCCESS! '{report_pdf_path}' has been downloaded to your SageMaker Studio directory.")
print("You can find it in the file browser on the left. Right-click it and select 'Download' to save it to your local computer.")

Report files are located in: s3://sagemaker-us-east-1-533267190630/assignment5-bias-monitor/reports

Downloading analysis.json for inspection...

--- Post-Training Bias Metrics ---
No facets found in the report.

Downloading the final report.pdf for submission...

SUCCESS! 'report.pdf' has been downloaded to your SageMaker Studio directory.
You can find it in the file browser on the left. Right-click it and select 'Download' to save it to your local computer.


# Step 5: Display Bias Report Metrics from `analysis.json`

To confirm the successful completion of the job, we will now load the `analysis.json` file from the S3 report path and display the key `post_training_bias_metrics` section. This provides direct proof that the bias analysis was performed and the results are available.

In [19]:
import json
from sagemaker.s3 import S3Downloader

# S3 path where the report was saved
report_s3_path = reports_s3_key

# Define the local filename for the downloaded analysis results
analysis_json_path = "analysis.json"

print(f"Downloading results file from: {report_s3_path}/{analysis_json_path}")
S3Downloader.download(f"{report_s3_path}/{analysis_json_path}", ".")

print("\n--- Raw JSON Output for Post-Training Bias Metrics ---")

# Open and load the downloaded JSON file
with open(analysis_json_path, "r") as f:
    analysis_results = json.load(f)

# Isolate the post-training bias metrics section
post_training_metrics = analysis_results.get("post_training_bias_metrics")

if post_training_metrics:
    # Pretty-print the JSON snippet directly in the output cell
    print(json.dumps(post_training_metrics, indent=2))
else:
    print("Could not find post-training bias metrics in the report.")

print("\n---")
print("Job completion confirmed. The full PDF report is also available for download.")

Downloading results file from: s3://sagemaker-us-east-1-533267190630/assignment5-bias-monitor/reports/analysis.json

--- Raw JSON Output for Post-Training Bias Metrics ---
{
  "label": "col_0",
  "facets": {
    "col_11": [
      {
        "value_or_threshold": "1",
        "metrics": [
          {
            "name": "AD",
            "description": "Accuracy Difference (AD)",
            "value": -0.001626016260162566
          },
          {
            "name": "CDDPL",
            "description": "Conditional Demographic Disparity in Predicted Labels (CDDPL)",
            "value": null,
            "error": "Group variable is empty or not provided"
          },
          {
            "name": "DAR",
            "description": "Difference in Acceptance Rates (DAR)",
            "value": 0.09827586206896555
          },
          {
            "name": "DCA",
            "description": "Difference in Conditional Acceptance (DCA)",
            "value": -0.1206896551724137
          },
 