# 05 - Feature Engineering

**Purpose**: Create derived features for modeling and deeper analysis.

**Features to Create**:
1. **Matchup features**: Trophy diff, elixir diff, card level diff
2. **Deck complexity**: Weighted score based on elixir, spell count, legendary count
3. **Archetype indicators**: Beatdown, cycle, spell-heavy flags
4. **Card synergy scores**: Based on historical win rates of card pairs
5. **Trophy brackets**: Categorical variables for skill levels

**Output**: Clean feature matrix saved as Parquet for modeling

In [None]:
import sys, os, duckdb, pandas as pd, numpy as np

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, os.path.join(PROJECT_ROOT, 'src'))

# Use Parquet if available (faster), fallback to CSV
DATA_PATH = os.path.join(PROJECT_ROOT, 'battles.parquet')
if not os.path.exists(DATA_PATH):
    DATA_PATH = os.path.join(PROJECT_ROOT, 'battles.csv')

from duckdb_utils import get_connection, create_battles_view, query_to_df, save_to_parquet, create_sample
from feature_engineering import (
    create_card_level_features,
    create_deck_archetype_features,
    create_trophy_bracket_features,
    create_matchup_features,
    create_tower_damage_features
)

con = get_connection()
create_battles_view(con, DATA_PATH)

## 1. Load Base Data

Work with a sample for feature engineering development

In [None]:
# Create 10% sample if not exists
sample_path = 'artifacts/sample_battles_10pct.parquet'
if not os.path.exists(os.path.join(PROJECT_ROOT, sample_path)):
    print("Creating 10% sample...")
    sample = create_sample(con, sample_pct=10, output_path=sample_path)
else:
    print("Loading existing sample...")
    sample = pd.read_parquet(os.path.join(PROJECT_ROOT, sample_path))
    
print(f"Sample size: {len(sample):,} battles")

## 2. Create Matchup Features

In [None]:
# Add matchup comparison features
sample_features = create_matchup_features(sample)

print("Matchup features created:")
print("  - trophy_diff")
print("  - elixir_diff")
print("  - card_level_diff")
print("  - spell_diff")

## 3. Create Deck Archetype Features

In [None]:
# Add archetype indicators for winner and loser
sample_features = create_deck_archetype_features(sample_features, player='winner')
sample_features = create_deck_archetype_features(sample_features, player='loser')

print("Archetype features created for both players")

## 4. Create Trophy Bracket Features

In [None]:
# Categorize battles by trophy level
sample_features = create_trophy_bracket_features(sample_features)

print("Trophy bracket distribution:")
print(sample_features['trophy_bracket'].value_counts())

## 5. Create Tower Damage Features

In [None]:
# Add crown-related features
sample_features = create_tower_damage_features(sample_features)

print("Tower damage features created:")
print("  - crown_diff")
print("  - close_game")
print("  - three_crown_win")

## 6. Save Feature Matrix

In [None]:
# Save engineered features for modeling
save_to_parquet(sample_features, 'artifacts/model_features.parquet')

print(f"\nâœ“ Feature matrix saved with {len(sample_features.columns)} columns")

## 7. Feature Summary

In [None]:
# List all engineered features
engineered_cols = [col for col in sample_features.columns 
                   if any(x in col for x in ['_diff', '_heavy', '_beatdown', '_cycle', 'bracket', 'close_game'])]

print(f"Engineered features ({len(engineered_cols)}):")
for col in sorted(engineered_cols):
    print(f"  - {col}")