In [41]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.pipeline import Pipeline
import pandas as pd

In [43]:

class ModelTrainer:
    def __init__(self):
        self.models = {}
        self.best_model = None
        self.best_score = 0
        self.results = {}

    def train_models(self, X_train, y_train, X_val, y_val):
        model_grid = {
            "LogisticRegression": {
                "model": LogisticRegression(max_iter=1000),
                "params": {
                    "C": [0.01, 0.1, 1, 10],
                    "penalty": ['l2'],
                    "solver": ['lbfgs']
                }
            },
            "RandomForest": {
                "model": RandomForestClassifier(random_state=42),
                "params": {
                    "n_estimators": [100, 200],
                    "max_depth": [None, 10, 20],
                    "min_samples_split": [2, 5]
                }
            },
            "GradientBoosting": {
                "model": GradientBoostingClassifier(random_state=42),
                "params": {
                    "n_estimators": [100, 200],
                    "learning_rate": [0.05, 0.1],
                    "max_depth": [3, 5]
                }
            },
            "XGBoost": {
                "model": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
                "params": {
                    "n_estimators": [100, 200],
                    "learning_rate": [0.05, 0.1],
                    "max_depth": [3, 5]
                }
            }
        }

        for name, config in model_grid.items():
            print(f"\n🔍 Training and tuning {name}...")
            grid_search = GridSearchCV(config['model'], config['params'], cv=3, scoring='f1', n_jobs=-1)
            grid_search.fit(X_train, y_train)

            best_model = grid_search.best_estimator_
            y_pred = best_model.predict(X_val)

            acc = accuracy_score(y_val, y_pred)
            f1 = f1_score(y_val, y_pred)

            self.results[name] = {
                "accuracy": acc,
                "f1_score": f1,
                "model": best_model,
                "classification_report": classification_report(y_val, y_pred, output_dict=True)
            }

            if f1 > self.best_score:
                self.best_score = f1
                self.best_model = best_model

    def get_best_model(self):
        return self.best_model

    def get_results(self):
        return pd.DataFrame({
            model: {
                "Accuracy": round(result["accuracy"], 4),
                "F1 Score": round(result["f1_score"], 4)
            }
            for model, result in self.results.items()
        }).T


In [45]:
from sklearn.model_selection import train_test_split

# 1. Load your cleaned data
df = pd.read_csv("../data/cleaned_churn_data.csv")  # replace with your actual filename

# 2. Separate features and target
X = df.drop(columns=['churn'])
y = df['churn']

# 3. Split into train/validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

trainer = ModelTrainer()
trainer.train_models(X_train, y_train, X_val, y_val)

print("📊 Performance Summary:")
print(trainer.get_results())

best_model = trainer.get_best_model()
y_pred = best_model.predict(X_val)
print("📄 Classification Report:")
print(classification_report(y_val, y_pred))
print("🏆 Best Model:", best_model.__class__.__name__)




🔍 Training and tuning LogisticRegression...

🔍 Training and tuning RandomForest...

🔍 Training and tuning GradientBoosting...

🔍 Training and tuning XGBoost...
📊 Performance Summary:
                    Accuracy  F1 Score
LogisticRegression    0.8162    0.6266
RandomForest          0.8178    0.6411
GradientBoosting      0.8287    0.6605
XGBoost               0.8287    0.6615
📄 Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.91      0.89       937
           1       0.71      0.62      0.66       347

    accuracy                           0.83      1284
   macro avg       0.79      0.76      0.77      1284
weighted avg       0.82      0.83      0.82      1284

🏆 Best Model: XGBClassifier


In [46]:
import joblib
# Save to file
joblib.dump(best_model, "best_churn_model.pkl")
print("✅ Best model saved as best_churn_model.pkl")

✅ Best model saved as best_churn_model.pkl
