In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv("../data/clean_data.csv")
cluster_df = pd.read_csv("../data/clustered_time_patterns.csv")


📥 Loading data


In [3]:
if 'time_bucket' in df.columns and 'time_bucket' in cluster_df.columns:
    df = df.merge(cluster_df[['hour', 'dayofweek', 'time_bucket', 'cluster', 'cold_start_ratio', 'total_invocations']],
                  on=['hour', 'dayofweek', 'time_bucket'], how='left')
else:
    df = df.merge(cluster_df[['hour', 'dayofweek', 'cluster', 'cold_start_ratio', 'total_invocations']],
                  on=['hour', 'dayofweek'], how='left')

In [4]:
clf = joblib.load("../models/cold_start_classifier.pkl")
reg = joblib.load("../models/delay_predictor.pkl")


🔍 Loading trained models


In [5]:
print("\n⚙️ Running Inference")
clf_features = ['hour', 'dayofweek', 'is_weekend', 'cluster']
reg_features = ['cold_start_ratio', 'total_invocations', 'cluster', 'anomaly_flag', 'cold_start_prob_est']


⚙️ Running Inference


In [6]:
missing_clf = [f for f in clf_features if f not in df.columns]
missing_reg = [f for f in reg_features if f not in df.columns]
if missing_clf:
    raise ValueError(f"Missing classification features: {missing_clf}")
if missing_reg:
    raise ValueError(f"Missing regression features: {missing_reg}")

In [7]:
df['cold_start_proba'] = clf.predict_proba(df[clf_features])[:, 1]

In [8]:
if 'cold_start_prob_est' not in df.columns:
    df['cold_start_prob_est'] = df['cold_start_proba']

In [9]:
df['predicted_delay'] = reg.predict(df[reg_features])


In [10]:
def smart_prewarm(prob, delay, prob_thresh=0.6, delay_thresh=400):
    return int(prob > prob_thresh or delay > delay_thresh)
df['prewarm'] = df.apply(lambda row: smart_prewarm(row['cold_start_proba'], row['predicted_delay']), axis=1)


🔥 Smart Prewarming Decision Logic


In [11]:
cold_start_penalty = 0.005  # $/second of cold start delay
prewarm_cost = 0.002        # $ per prewarm action
df['cold_start_cost'] = df['Cold_Start'] * df['Delay (s)'] * cold_start_penalty
df['prewarm_cost'] = df['prewarm'] * prewarm_cost
df['net_savings'] = df['cold_start_cost'] - df['prewarm_cost']

In [12]:
total = len(df)
actual_cold_starts = df['Cold_Start'].sum()
predicted_cold_starts = (df['cold_start_proba'] > 0.6).sum()
prewarm_actions = df['prewarm'].sum()
total_cold_cost = df['cold_start_cost'].sum()
total_prewarm_cost = df['prewarm_cost'].sum()
total_savings = df['net_savings'].sum()

In [13]:
print("=== Summary ===")
print(f"Total Requests: {total}")
print(f"Actual Cold Starts: {actual_cold_starts}")
print(f"Predicted Cold Starts (>60% prob): {predicted_cold_starts}")
print(f"Prewarming Actions Taken: {prewarm_actions}")
print(f"Total Cold Start Cost: ${total_cold_cost:.2f}")
print(f"Total Prewarming Cost: ${total_prewarm_cost:.2f}")
print(f"Net Cost Savings: ${total_savings:.2f}")

=== Summary ===
Total Requests: 10630
Actual Cold Starts: 317
Predicted Cold Starts (>60% prob): 36
Prewarming Actions Taken: 360
Total Cold Start Cost: $1585.22
Total Prewarming Cost: $0.72
Net Cost Savings: $1584.51
