# Baseline Models: Logistic Regression & Decision Tree

This notebook trains two simple baseline models wrapped in our new modular pipeline. We use the shared `src/` modules to ensure consistency and modularity.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')

# Ensure src is in path
sys.path.append(os.path.abspath('..'))

from src.data_loader import load_data, get_target
from src.preprocessing import get_preprocessing_pipeline
from src.trainer import train_and_evaluate, save_model

print("Modules imported.")

In [None]:
# 1. Load Data
df = load_data('../data/Churn_Modelling.csv')
X, y, target_col = get_target(df)

numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()

# 2. Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 3. Get Preprocessing Pipeline
preprocessor = get_preprocessing_pipeline(numeric_cols, categorical_cols)

baseline_models = {
    'LogisticRegression': LogisticRegression(solver='liblinear', class_weight='balanced', random_state=42),
    'DecisionTree': DecisionTreeClassifier(class_weight='balanced', max_depth=5, random_state=42)
}

baseline_results = {}
for name, model in baseline_models.items():
    pipeline, metrics = train_and_evaluate(
        name, model, preprocessor, X_train, y_train, X_test, y_test
    )
    baseline_results[name] = metrics
    save_model(pipeline, f'../models/{name.lower()}_pipeline.joblib')