In [1]:
import sys
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# Ensure src is in path
sys.path.append(os.path.abspath('..'))

from src.data_loader import load_data, get_target
from src.preprocessing import get_preprocessing_pipeline
from src.trainer import train_and_evaluate, save_model

print("Project modules imported successfully.")

Project modules imported successfully.


In [None]:
# 1. Load Data
df = load_data('../data/Churn_Modelling.csv')
X, y, target_col = get_target(df)

numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()

# 2. Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 3. Get Preprocessing Pipeline
preprocessor = get_preprocessing_pipeline(numeric_cols, categorical_cols)

print(f"Data loaded and split. Features: {X.shape[1]}")

In [None]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

advanced_models = {
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    'LightGBM': LGBMClassifier(random_state=42),
    'CatBoost': CatBoostClassifier(verbose=0, random_state=42)
}

advanced_results = {}

for name, model in advanced_models.items():
    pipeline, metrics = train_and_evaluate(
        name, model, preprocessor, X_train, y_train, X_test, y_test
    )
    advanced_results[name] = metrics
    save_model(pipeline, f'../models/{name.lower()}_pipeline.joblib')

In [None]:
# Compare results
res_df = pd.DataFrame(advanced_results).T
res_df.sort_values(by='f1', ascending=False)