# 02. Feature Engineering & Modeling

This notebook demonstrates the Phase 2 workflow: Feature Engineering, Model Training, and Evaluation.

In [None]:
import sys
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Add src to path
sys.path.append(os.path.abspath(os.path.join('../')))

from src.models.train import train_model
from src.models.model_selection import compare_models

%matplotlib inline

## 1. Run Training Pipeline

We will execute the `train_model` function which handles:
1. Loading Raw Data
2. Creating new features (Engineering)
3. Splitting Data
4. Building Preprocessing Pipeline
5. Hyperparameter Tuning (GridSearch)
6. Evaluation (ROC, Confusion Matrix)

In [None]:
# This might take a few minutes for GridSearch
# Ensure you are in 'notebooks' dir when running relative paths, or adjust train_model paths.
# Since train_model assumes 'data/raw' relative to project root, we might need to chdir.

curr_dir = os.getcwd()
if curr_dir.endswith('notebooks'):
    os.chdir('..')
    print(f"Changed directory to: {os.getcwd()}")

train_model()

## 2. Compare Models

In [None]:
comparison_df = compare_models()
comparison_df

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(data=comparison_df, x='model', y='f1')
plt.title("F1 Score Comparison")
plt.ylim(0, 1)
plt.show()