In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import mlflow
import mlflow.sklearn

df_clean = pd.read_csv('telco_churn_clean.csv')
df_drifted = pd.read_csv('telco_churn_drifted.csv')

# Combine clean + drifted data (simulate real-world adaptation)
df_combined = pd.concat([df_clean, df_drifted], axis=0).sample(frac=1, random_state=42).reset_index(drop=True)

print("Combined dataset shape:", df_combined.shape)


Combined dataset shape: (14064, 31)


In [2]:
X = df_combined.drop('Churn', axis=1)
y = df_combined['Churn']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Train new model
model = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Retrained Model on Combined Data:\n")
print(classification_report(y_test, y_pred))

Retrained Model on Combined Data:

              precision    recall  f1-score   support

           0       0.79      0.90      0.84      1968
           1       0.65      0.44      0.52       845

    accuracy                           0.76      2813
   macro avg       0.72      0.67      0.68      2813
weighted avg       0.75      0.76      0.75      2813



In [3]:
mlflow.set_experiment("Telco Churn Monitoring")

with mlflow.start_run(run_name="Retrained on Clean+Drifted"):
    mlflow.log_param("model_type", "RandomForest")
    mlflow.log_param("training_data", "clean + drifted")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("class_weight", "balanced")

    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)
    mlflow.log_metric("f1_score", f1)

    mlflow.sklearn.log_model(model, "rf_retrained_mixed")

    print("Retrained model logged to MLflow.")



Retrained model logged to MLflow.
