In [1]:
import joblib
import pandas as pd

model = joblib.load("models/churn-model.pkl")
scaler = joblib.load("models/scaler.pkl")

ref_data = pd.read_csv("data/train_data.csv")
prod_data = pd.read_csv("data/new_batch_drifted.csv")

In [2]:
ref_data.shape

(5274, 20)

In [3]:
prod_data.shape

(1758, 20)

In [4]:
y_ref = ref_data["Churn"]
X_ref = ref_data.drop(columns=["Churn"])

y_prod = prod_data["Churn"]
X_prod = prod_data.drop(columns=["Churn"])

In [5]:
X_ref_encoded = pd.get_dummies(X_ref,drop_first=True)
X_prod_encoded = pd.get_dummies(X_prod,drop_first=True)

X_ref_encoded, X_prod_encoded = X_ref_encoded.align(X_prod_encoded,join="left",axis=1,fill_value=0)

In [6]:
numerical_cols = X_ref.select_dtypes(include=["int64","float64"]).columns

X_ref_encoded[numerical_cols] = scaler.transform(X_ref_encoded[numerical_cols])
X_prod_encoded[numerical_cols] = scaler.transform(X_prod_encoded[numerical_cols])

In [7]:
ref_probs = model.predict_proba(X_ref_encoded)[:,1]
prod_probs = model.predict_proba(X_prod_encoded)[:,1]

ref_preds = model.predict(X_ref_encoded)
prod_preds = model.predict(X_prod_encoded)

In [8]:
print("Reference Churn Rate:",ref_preds.mean())
print("Production Churn Rate:",prod_preds.mean())

Reference Churn Rate: 0.22582480091012513
Production Churn Rate: 0.0022753128555176336


In [9]:
print("\nReference Probability Summary:\n")
print(pd.Series(ref_probs).describe())
print("\n\nProduction Probability Summary:\n")
print(pd.Series(prod_probs).describe())


Reference Probability Summary:

count    5274.000000
mean        0.268898
std         0.245366
min         0.001367
25%         0.043513
50%         0.197308
75%         0.467279
max         0.853505
dtype: float64


Production Probability Summary:

count    1758.000000
mean        0.073991
std         0.069520
min         0.001381
25%         0.020776
50%         0.056015
75%         0.106272
max         0.580794
dtype: float64


MAKING REUSABLE FUNCTIONS TO PREDICT DRIFTS AND SHIFTS

In [10]:
import pandas as pd
import joblib
from scipy.stats import ks_2samp, chi2_contingency

model = joblib.load("models/churn-model.pkl")
scaler = joblib.load("models/scaler.pkl")

ref_data = pd.read_csv("data/train_data.csv")
prod_data = pd.read_csv("data/new_batch_drifted.csv")

print("Artifacts loaded successfully.")


Artifacts loaded successfully.


In [11]:
#This ensures production inference uses the same logic as training.

def preprocess_for_inference (df,scaler,ref_cols):
    X = df.drop(columns=["Churn"])
    
    X_encoded = pd.get_dummies(X,drop_first=True)
    X_encoded = X_encoded.reindex(columns=ref_cols,fill_value=0)
    
    numerical_cols = X.select_dtypes(include=["int64","float64"]).columns
    X_encoded[numerical_cols] = scaler.transform(X_encoded[numerical_cols])
    
    return X_encoded

In [12]:
#function for data drift detection

def detect_data_drift (ref_df,prod_df,alpha=0.05):
    drift_report = {}
    
    numerical_cols = ref_df.select_dtypes(include=["int64","float64"]).columns
    categorical_cols = ref_df.select_dtypes(include=["object"]).columns

    for col in numerical_cols:
        stat,p_value = ks_2samp(ref_df[col],prod_df[col])
        drift_report[col]={
            "type" : "numerical",
            "p_value" : p_value,
            "drift_detected" : p_value < alpha
        }
        
    for col in categorical_cols:
        ref_counts = ref_df[col].value_counts()
        prod_counts = prod_df[col].value_counts()
        combined = pd.concat([ref_counts,prod_counts],axis=1).fillna(0)
        chi2,p_value,_,_ = chi2_contingency(combined)
        drift_report[col]={
            "type" : "categorical",
            "p_value" : p_value,
            "drift_detected" : p_value < alpha
        }

        return pd.DataFrame(drift_report).T

In [13]:
#function to realise shift in model prediction

def detect_prediction_shift (model,X_ref,X_prod):
    ref_probs = model.predict_proba(X_ref)[:,1]
    prod_probs = model.predict_proba(X_prod)[:,1]

    ref_churn_rate = (ref_probs>0.5).mean()
    prod_churn_rate = (prod_probs>0.5).mean()

    return {
        "reference_churn_rate" : ref_churn_rate,
        "production_churn_rate" : prod_churn_rate,
        "difference" : abs(ref_churn_rate - prod_churn_rate) 
    }

Detecting Data Drift

In [14]:
data_drift_report = detect_data_drift(ref_data,prod_data)
data_drift_report

Unnamed: 0,type,p_value,drift_detected
SeniorCitizen,numerical,1.0,False
tenure,numerical,0.715207,False
MonthlyCharges,numerical,0.0,True
TotalCharges,numerical,0.843844,False
Churn,numerical,0.950803,False
gender,categorical,0.879601,False


Detecting Prediction Shift

In [16]:
ref_cols = pd.get_dummies(ref_data.drop(columns=["Churn"]),drop_first=True).columns

X_ref = preprocess_for_inference(ref_data,scaler,ref_cols)
X_prod = preprocess_for_inference(prod_data,scaler,ref_cols)

prediction_shift = detect_prediction_shift(model,X_ref,X_prod)
prediction_shift

{'reference_churn_rate': np.float64(0.22582480091012513),
 'production_churn_rate': np.float64(0.0022753128555176336),
 'difference': np.float64(0.2235494880546075)}

Alert Rules & "Model at Risk" Decisions

In [18]:
DATA_DRIFT_THRESHOLD = 0.3
PREDICTION_SHIFT_THRESHOLD = 0.05

drifted_features = (data_drift_report["drift_detected"] == True).sum()
total_features = len(data_drift_report)
drift_ratio = drifted_features/total_features

print(f"Drifted Features = {drifted_features}/{total_features}")
print(f"Drift Ration = {drift_ratio:.2f}")

Drifted Features = 1/6
Drift Ration = 0.17


In [19]:
prediction_shift_value = prediction_shift["difference"]
print("Prediction Shitf:",prediction_shift_value)

Prediction Shitf: 0.2235494880546075


In [20]:
if (drift_ratio > DATA_DRIFT_THRESHOLD) or (prediction_shift_value > PREDICTION_SHIFT_THRESHOLD):
    model_status = "Model_at_Risk"
else:
    model_status = "Model_OK"

model_status

'Model_at_Risk'

In [22]:
summary = {
    "Total_Features" : len(data_drift_report),
    "Drifted_Features" : drifted_features,
    "Drift_Ratio" : round(drift_ratio, 3),
    "Reference_Churn_rate" : round(prediction_shift["reference_churn_rate"], 3),
    "Production_Churn_rate" : round(prediction_shift["production_churn_rate"], 3),
    "Prediction_Shift" : round(prediction_shift["difference"], 3),
    "Model_Status" : model_status
}
summary_df = pd.DataFrame([summary])
summary_df

Unnamed: 0,Total_Features,Drifted_Features,Drift_Ratio,Reference_Churn_rate,Production_Churn_rate,Prediction_Shift,Model_Status
0,6,1,0.167,0.226,0.002,0.224,Model_at_Risk


In [23]:
drift_report_with_status = data_drift_report.copy()
drift_report_with_status["model_status"] = model_status

drift_report_with_status.head()

Unnamed: 0,type,p_value,drift_detected,model_status
SeniorCitizen,numerical,1.0,False,Model_at_Risk
tenure,numerical,0.715207,False,Model_at_Risk
MonthlyCharges,numerical,0.0,True,Model_at_Risk
TotalCharges,numerical,0.843844,False,Model_at_Risk
Churn,numerical,0.950803,False,Model_at_Risk


In [25]:
summary_df.to_csv("reports/monitoring_summary.csv", index=False)
drift_report_with_status.to_csv("reports/feature_drift_report.csv")
print("Reports saved:")
print("- reports/monitoring_summary.csv")
print("- reports/feature_drift_report.csv")

Reports saved:
- reports/monitoring_summary.csv
- reports/feature_drift_report.csv
