In [2]:
# pip install evidently==0.3.2

In [1]:
# pip install pydantic==1.10.13

In [3]:
import pandas as pd
import numpy as np
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from sklearn.preprocessing import LabelEncoder
import mlflow
from evidently.test_suite import TestSuite
from evidently.test_preset import DataDriftTestPreset
import os

In [4]:
# Load clean dataset
df = pd.read_csv('data/Telco-Customer-Churn.csv')

In [5]:
# Convert TotalCharges and drop missing
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df.dropna(inplace=True)

In [6]:
# Simulate drift: skew contract types, monthly charges, and tenure
df_drifted = df.copy()

df_drifted["Contract"] = df_drifted["Contract"].apply(
    lambda x: "Month-to-month" if np.random.rand() < 0.7 else x
)

df_drifted["tenure"] = df_drifted["tenure"].apply(
    lambda x: np.random.randint(0, 12) if np.random.rand() < 0.5 else x
)

df_drifted["MonthlyCharges"] += np.random.normal(5, 10, size=len(df_drifted))

# Save drifted version (optional)
df_drifted.to_csv("data/processed_telco.csv", index=False)

In [7]:
ref = df.copy()
cur = df_drifted.copy()

le = LabelEncoder()
for col in ref.select_dtypes(include="object").columns:
    ref[col] = le.fit_transform(ref[col])
    cur[col] = le.transform(cur[col])

In [8]:
report = Report(metrics=[DataDriftPreset()])
report.run(reference_data=ref, current_data=cur)
report.save_html("drift_report.html")
print("Drift report saved as 'drift_report.html'")

Drift report saved as 'drift_report.html'


In [12]:
mlflow.set_experiment("Churn_Prediction_AdaptiveML")

# Extract drift metrics from your JSON result
feature_stats = result["tests"][0]["parameters"]["features"]
n_total = len(feature_stats)
n_drifted = sum(1 for _, f in feature_stats.items() if f["detected"])
drift_score = n_drifted / n_total

# Log drift score + artifact to MLflow
with mlflow.start_run(run_name="drift_monitoring_report"):
    mlflow.log_metric("drift_score", drift_score)
    mlflow.log_metric("drifted_columns", n_drifted)
    mlflow.log_metric("total_columns", n_total)
    mlflow.log_artifact("drift_report.html")
    print(f"Logged drift_score={drift_score:.3f} ({n_drifted}/{n_total}) and report to MLflow")

    # Optional: trigger retrain
    if drift_score > 0.3:
        print("Drift is significant. Triggering retraining...")
        import subprocess
        subprocess.call(["python", "retrain_on_drift.py"])
    else:
        print("Drift is within acceptable range.")

Logged drift_score=0.143 (3/21) and report to MLflow
Drift is within acceptable range.
