In [24]:
# -------------------------------------------
# Notebook 2 Batch Inference & Deployment Simulation
# Simulating Azure ML Deployment Inference Pipeline
# -------------------------------------------

import pandas as pd
import numpy as np
import os
import joblib
from sklearn.metrics import accuracy_score, classification_report

# Load trained model
model_path = "models/random_forest_model.pkl"

if not os.path.exists(model_path):
    raise FileNotFoundError("Trained model not found. Please run Notebook 1 first.")

model = joblib.load(model_path)
print("Loaded trained model from disk.")


Loaded trained model from disk.


In [25]:
# Load dataset saved in Notebook 1
data_path = "data/adult.csv"
if not os.path.exists(data_path):
    raise FileNotFoundError("Dataset not found. Make sure 'adult.csv' is saved by Notebook 1.")

df = pd.read_csv(data_path)

# Simulate a new batch of 50 unseen rows
new_data = df.sample(n=50, random_state=42).copy()
print("Sampled new batch of 50 records.")


Sampled new batch of 50 records.


In [26]:
# Preprocessing
new_data.replace('?', np.nan, inplace=True)
new_data.dropna(inplace=True)

# Encode target variable
new_data['income'] = new_data['income'].apply(lambda x: 1 if '>50K' in x else 0)

# One-hot encode categorical columns
categorical_cols = new_data.select_dtypes(include='object').columns
new_data = pd.get_dummies(new_data, columns=categorical_cols)

# Separate features and labels
y_true = new_data['income']
X_batch = new_data.drop('income', axis=1)

# Match feature columns with training data
trained_columns = model.named_steps['scaler'].feature_names_in_
X_batch = X_batch.reindex(columns=trained_columns, fill_value=0)

print("Batch preprocessed and aligned with training features.")


Batch preprocessed and aligned with training features.


In [27]:
# Predict using the loaded model
y_pred = model.predict(X_batch)

# Evaluate performance
accuracy = accuracy_score(y_true, y_pred)
report = classification_report(y_true, y_pred)

print(f" Batch Accuracy: {accuracy:.4f}")
print("\n Classification Report:\n")
print(report)


 Batch Accuracy: 0.8776

 Classification Report:

              precision    recall  f1-score   support

           0       0.85      1.00      0.92        35
           1       1.00      0.57      0.73        14

    accuracy                           0.88        49
   macro avg       0.93      0.79      0.82        49
weighted avg       0.90      0.88      0.87        49



In [28]:
import json

metrics_output = {
    "batch_accuracy": accuracy,
    "report": classification_report(y_true, y_pred, output_dict=True)
}

with open("metrics/batch_metrics.json", "w") as f:
    json.dump(metrics_output, f, indent=4)

print("Batch metrics saved to 'metrics/batch_metrics.json'.")

Batch metrics saved to 'metrics/batch_metrics.json'.
