# ⚙️ FPL-AI Feature Engineering Notebook

## Overview
This notebook creates advanced features for Fantasy Premier League prediction models:
- **Form Analysis**: Multi-window rolling performance metrics
- **Fixture Intelligence**: Dynamic difficulty beyond basic FDR
- **Team Dynamics**: Player roles, competition, set pieces
- **Position-Specific Features**: GK saves, DEF attacks, MID creativity, FWD threat

## Feature Categories:
- 📈 **Form Metrics**: 3, 5, 8, 10 gameweek windows
- ⚽ **Fixture Analysis**: Home/away, opponent strength, expected goals
- 👥 **Team Context**: Price rank, rotation risk, set piece roles
- 🎯 **Position Features**: Role-specific performance indicators

## Expected Output: 50+ features per player

In [None]:
# Cell 1: Environment Setup and Data Loading
print("⚙️ Setting up FPL-AI Feature Engineering Environment...")

# Install required packages
!pip install -q pandas numpy scipy scikit-learn
!pip install -q tqdm ipywidgets plotly

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Set project directory
import os
project_dir = '/content/drive/MyDrive/FPL_AI_Project'
os.chdir(project_dir)

# Create processed data directory
os.makedirs('data/processed', exist_ok=True)

print("✅ Environment setup complete!")
print(f"📁 Working directory: {os.getcwd()}")

# Import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import glob
import json
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')

print("📚 Libraries imported successfully!")

In [None]:
# Cell 2: Load Collected Data

print("📂 Loading collected FPL data...")

# Load the most recent data files
def load_latest_data(pattern):
    """Load the most recent data file matching pattern."""
    files = glob.glob(f'data/raw/*{pattern}*.csv')
    if files:
        latest_file = max(files, key=os.path.getctime)
        return pd.read_csv(latest_file), latest_file
    return pd.DataFrame(), None

# Load datasets
datasets = {}
file_paths = {}

data_types = ['players', 'teams', 'gameweeks', 'fixtures', 'historical_gameweeks', 'injuries']

for data_type in data_types:
    df, filepath = load_latest_data(data_type)
    datasets[data_type] = df
    file_paths[data_type] = filepath
    
    if not df.empty:
        print(f"✅ {data_type}: {len(df)} records from {filepath}")
    else:
        print(f"⚠️ {data_type}: No data found")

# Validate required data
required_data = ['players', 'historical_gameweeks']
missing_data = [dt for dt in required_data if datasets[dt].empty]

if missing_data:
    print(f"❌ Missing required data: {missing_data}")
    print("Please run FPL_Data_Collection.ipynb first.")
else:
    print("\n✅ All required data loaded successfully!")
    print(f"📊 Total players: {len(datasets['players'])}")
    print(f"📈 Historical records: {len(datasets['historical_gameweeks'])}")
    
    # Show data overview
    if not datasets['players'].empty:
        print(f"\n👥 Players by position:")
        print(datasets['players']['position'].value_counts())
    
    if not datasets['historical_gameweeks'].empty:
        gw_range = datasets['historical_gameweeks']['gameweek']
        print(f"\n📅 Gameweek range: {gw_range.min()} - {gw_range.max()}")

In [None]:
# Cell 3: Form and Performance Features

class FormMetricsCalculator:
    """Calculate form metrics for FPL players."""
    
    def __init__(self):
        self.form_windows = [3, 5, 8, 10]  # Different form windows
    
    def calculate_player_form(self, historical_df, player_id, current_gameweek):
        """Calculate form metrics for a specific player."""
        player_data = historical_df[historical_df['player_id'] == player_id].copy()
        
        if player_data.empty:
            return self._get_default_form_metrics()
        
        player_data = player_data.sort_values('gameweek')
        form_metrics = {}
        
        # Calculate form for different windows
        for window in self.form_windows:
            recent_data = player_data[
                player_data['gameweek'] >= (current_gameweek - window)
            ].copy()
            
            if not recent_data.empty:
                # Basic form metrics
                form_metrics[f'form_points_{window}gw'] = recent_data['total_points'].mean()
                form_metrics[f'form_minutes_{window}gw'] = recent_data['minutes'].mean()
                form_metrics[f'form_goals_{window}gw'] = recent_data['goals_scored'].sum()
                form_metrics[f'form_assists_{window}gw'] = recent_data['assists'].sum()
                form_metrics[f'form_bonus_{window}gw'] = recent_data['bonus'].mean()
                
                # Advanced form metrics
                if len(recent_data) > 1:
                    form_metrics[f'form_consistency_{window}gw'] = (
                        1.0 / (1.0 + recent_data['total_points'].std())
                    )
                    form_metrics[f'form_trend_{window}gw'] = self._calculate_trend(
                        recent_data['total_points'].values
                    )
                else:
                    form_metrics[f'form_consistency_{window}gw'] = 1.0
                    form_metrics[f'form_trend_{window}gw'] = 0.0
                
                # Position-specific metrics
                if 'clean_sheets' in recent_data.columns:
                    form_metrics[f'form_clean_sheets_{window}gw'] = recent_data['clean_sheets'].sum()
                if 'saves' in recent_data.columns:
                    form_metrics[f'form_saves_{window}gw'] = recent_data['saves'].sum()
            else:
                # No data for this window
                for metric in ['points', 'minutes', 'goals', 'assists', 'bonus', 'consistency', 'trend']:
                    form_metrics[f'form_{metric}_{window}gw'] = 0.0
        
        # Weighted recent form (exponential weighting)
        if len(player_data) >= 5:
            weights = np.exp(np.linspace(-1, 0, min(5, len(player_data))))
            weights = weights / weights.sum()
            recent_5 = player_data.tail(5)
            form_metrics['weighted_form_5gw'] = np.average(recent_5['total_points'], weights=weights)
        else:
            form_metrics['weighted_form_5gw'] = player_data['total_points'].mean() if not player_data.empty else 0.0
        
        return form_metrics
    
    def _get_default_form_metrics(self):
        """Return default form metrics for players with no history."""
        defaults = {}
        for window in self.form_windows:
            for metric in ['points', 'minutes', 'goals', 'assists', 'bonus', 'consistency', 'trend']:
                defaults[f'form_{metric}_{window}gw'] = 0.0
        defaults['weighted_form_5gw'] = 0.0
        return defaults
    
    def _calculate_trend(self, points_series):
        """Calculate trend in recent performance."""
        if len(points_series) < 2:
            return 0.0
        
        x = np.arange(len(points_series))
        trend = np.polyfit(x, points_series, 1)[0]
        return float(trend)

# Initialize form calculator
form_calculator = FormMetricsCalculator()
print("✅ Form metrics calculator initialized!")

# Test on a sample player
if not datasets['historical_gameweeks'].empty and not datasets['players'].empty:
    sample_player_id = datasets['players']['id'].iloc[0]
    sample_form = form_calculator.calculate_player_form(
        datasets['historical_gameweeks'], sample_player_id, 10
    )
    print(f"\n📊 Sample form metrics (Player {sample_player_id}):")
    for key, value in list(sample_form.items())[:5]:
        print(f"  {key}: {value:.3f}")
    print(f"  ... and {len(sample_form) - 5} more metrics")

In [None]:
# Cell 4: Fixture Difficulty and Team Strength Features

class FixtureAnalyzer:
    """Analyze fixture difficulty and team matchups."""
    
    def __init__(self, teams_df, fixtures_df):
        self.teams_df = teams_df
        self.fixtures_df = fixtures_df
        self.team_strength = self._calculate_team_strength()
    
    def _calculate_team_strength(self):
        """Calculate basic team strength ratings."""
        # Use FPL's own strength ratings if available
        team_strength = {}
        
        for _, team in self.teams_df.iterrows():
            team_id = team['id']
            
            # FPL provides strength ratings
            attack_strength = team.get('strength_attack_home', 1000) + team.get('strength_attack_away', 1000)
            defense_strength = team.get('strength_defence_home', 1000) + team.get('strength_defence_away', 1000)
            
            # Normalize to 1-5 scale
            overall_strength = (attack_strength + defense_strength) / 800  # Scale to ~2.5 average
            
            team_strength[team_id] = {
                'overall_strength': max(1, min(5, overall_strength)),
                'attack_strength': attack_strength / 1000,
                'defense_strength': defense_strength / 1000
            }
        
        return team_strength
    
    def get_fixture_features(self, team_id, gameweek):
        """Get fixture features for a team in a specific gameweek."""
        
        # Find fixtures for this team and gameweek
        team_fixtures = self.fixtures_df[
            (self.fixtures_df['event'] == gameweek) & 
            ((self.fixtures_df['team_h'] == team_id) | (self.fixtures_df['team_a'] == team_id))
        ]
        
        if team_fixtures.empty:
            return self._get_default_fixture_features()
        
        fixture_features = {}
        
        for _, fixture in team_fixtures.iterrows():
            is_home = fixture['team_h'] == team_id
            opponent_id = fixture['team_a'] if is_home else fixture['team_h']
            
            # Basic fixture info
            fixture_features['is_home'] = float(is_home)
            fixture_features['opponent_id'] = opponent_id
            
            # Opponent strength
            if opponent_id in self.team_strength:
                opp_strength = self.team_strength[opponent_id]['overall_strength']
                
                # Adjust for home advantage
                if is_home:
                    fixture_features['fixture_difficulty'] = max(1.0, opp_strength - 0.5)
                else:
                    fixture_features['fixture_difficulty'] = min(5.0, opp_strength + 0.5)
                
                # Expected goals
                team_attack = self.team_strength[team_id]['attack_strength']
                opp_defense = self.team_strength[opponent_id]['defense_strength']
                
                base_xg = team_attack / opp_defense * 1.3  # League average adjustment
                fixture_features['expected_goals_for'] = base_xg * (1.15 if is_home else 1.0)
                
                opp_attack = self.team_strength[opponent_id]['attack_strength']
                team_defense = self.team_strength[team_id]['defense_strength']
                
                base_xga = opp_attack / team_defense * 1.3
                fixture_features['expected_goals_against'] = base_xga * (1.0 if is_home else 1.15)
            else:
                fixture_features['fixture_difficulty'] = 3.0  # Neutral
                fixture_features['expected_goals_for'] = 1.3
                fixture_features['expected_goals_against'] = 1.3
        
        # Next 5 fixtures difficulty
        next_fixtures = self.fixtures_df[
            (self.fixtures_df['event'].between(gameweek, gameweek + 4)) &
            ((self.fixtures_df['team_h'] == team_id) | (self.fixtures_df['team_a'] == team_id))
        ]
        
        if not next_fixtures.empty:
            difficulties = []
            for _, fix in next_fixtures.iterrows():
                home = fix['team_h'] == team_id
                opp = fix['team_a'] if home else fix['team_h']
                if opp in self.team_strength:
                    opp_str = self.team_strength[opp]['overall_strength']
                    difficulties.append(opp_str - 0.3 if home else opp_str + 0.3)
            
            fixture_features['next_5_fixtures_difficulty'] = np.mean(difficulties) if difficulties else 3.0
        else:
            fixture_features['next_5_fixtures_difficulty'] = 3.0
        
        return fixture_features
    
    def _get_default_fixture_features(self):
        """Return default fixture features."""
        return {
            'is_home': 0.5,
            'opponent_id': 0,
            'fixture_difficulty': 3.0,
            'expected_goals_for': 1.3,
            'expected_goals_against': 1.3,
            'next_5_fixtures_difficulty': 3.0
        }

# Initialize fixture analyzer
if not datasets['teams'].empty and not datasets['fixtures'].empty:
    fixture_analyzer = FixtureAnalyzer(datasets['teams'], datasets['fixtures'])
    print("✅ Fixture analyzer initialized!")
    
    # Show team strength summary
    print("\n💪 Team Strength Summary:")
    strengths = [(team_id, data['overall_strength']) 
                for team_id, data in fixture_analyzer.team_strength.items()]
    strengths.sort(key=lambda x: x[1], reverse=True)
    
    print("Top 5 strongest teams:")
    for team_id, strength in strengths[:5]:
        team_name = datasets['teams'][datasets['teams']['id'] == team_id]['name'].iloc[0]
        print(f"  {team_name}: {strength:.2f}")
    
    # Test fixture analysis
    sample_team_id = datasets['teams']['id'].iloc[0]
    sample_fixtures = fixture_analyzer.get_fixture_features(sample_team_id, 1)
    print(f"\n⚽ Sample fixture analysis (Team {sample_team_id}):")
    for key, value in sample_fixtures.items():
        print(f"  {key}: {value:.3f}")
else:
    print("⚠️ Teams or fixtures data not available for fixture analysis")
    fixture_analyzer = None

In [None]:
# Cell 5: Position-Specific Features

class PositionSpecificFeatures:
    """Create position-specific features for FPL players."""
    
    def __init__(self, historical_df):
        self.historical_df = historical_df
    
    def calculate_goalkeeper_features(self, player_id):
        """Calculate goalkeeper-specific features."""
        player_history = self.historical_df[self.historical_df['player_id'] == player_id]
        
        if player_history.empty:
            return {
                'clean_sheet_probability': 0.3,
                'average_saves_per_game': 2.0,
                'penalty_save_rate': 0.1,
                'save_points_potential': 0.67
            }
        
        features = {}
        
        # Clean sheet rate
        features['clean_sheet_probability'] = player_history['clean_sheets'].sum() / len(player_history)
        
        # Save statistics
        features['average_saves_per_game'] = player_history['saves'].mean()
        features['save_points_potential'] = features['average_saves_per_game'] / 3  # 1 point per 3 saves
        
        # Penalty saves (if available)
        if 'penalties_saved' in player_history.columns:
            features['penalty_save_rate'] = player_history['penalties_saved'].sum() / max(1, len(player_history))
        else:
            features['penalty_save_rate'] = 0.1  # Default estimate
        
        return features
    
    def calculate_defender_features(self, player_id):
        """Calculate defender-specific features."""
        player_history = self.historical_df[self.historical_df['player_id'] == player_id]
        
        if player_history.empty:
            return {
                'clean_sheet_probability': 0.3,
                'goal_scoring_rate': 0.1,
                'assist_rate': 0.05,
                'attacking_threat': 10.0
            }
        
        features = {}
        
        # Defensive features
        features['clean_sheet_probability'] = player_history['clean_sheets'].sum() / len(player_history)
        
        # Attacking features
        features['goal_scoring_rate'] = player_history['goals_scored'].sum() / len(player_history)
        features['assist_rate'] = player_history['assists'].sum() / len(player_history)
        
        # Threat level (if available)
        if 'threat' in player_history.columns:
            features['attacking_threat'] = player_history['threat'].mean()
        else:
            features['attacking_threat'] = features['goal_scoring_rate'] * 50  # Estimate
        
        # Balance between defense and attack
        features['defensive_attacking_balance'] = (
            features['attacking_threat'] + features['clean_sheet_probability'] * 50
        ) / 2
        
        return features
    
    def calculate_midfielder_features(self, player_id):
        """Calculate midfielder-specific features."""
        player_history = self.historical_df[self.historical_df['player_id'] == player_id]
        
        if player_history.empty:
            return {
                'goal_scoring_rate': 0.15,
                'assist_rate': 0.1,
                'creativity_index': 30.0,
                'influence_index': 50.0
            }
        
        features = {}
        
        # Attacking output
        features['goal_scoring_rate'] = player_history['goals_scored'].sum() / len(player_history)
        features['assist_rate'] = player_history['assists'].sum() / len(player_history)
        features['attacking_returns_rate'] = features['goal_scoring_rate'] + features['assist_rate']
        
        # Creativity and influence (FPL ICT stats)
        if 'creativity' in player_history.columns:
            features['creativity_index'] = player_history['creativity'].mean()
        else:
            features['creativity_index'] = features['assist_rate'] * 200  # Estimate
        
        if 'influence' in player_history.columns:
            features['influence_index'] = player_history['influence'].mean()
        else:
            features['influence_index'] = (features['goal_scoring_rate'] + features['assist_rate']) * 250
        
        # Defensive contribution (for defensive midfielders)
        features['clean_sheet_involvement'] = player_history['clean_sheets'].sum() / len(player_history)
        
        return features
    
    def calculate_forward_features(self, player_id):
        """Calculate forward-specific features."""
        player_history = self.historical_df[self.historical_df['player_id'] == player_id]
        
        if player_history.empty:
            return {
                'goal_scoring_rate': 0.3,
                'assist_rate': 0.1,
                'attacking_threat': 50.0,
                'minutes_per_goal': 300
            }
        
        features = {}
        
        # Scoring ability
        features['goal_scoring_rate'] = player_history['goals_scored'].sum() / len(player_history)
        features['assist_rate'] = player_history['assists'].sum() / len(player_history)
        features['attacking_returns_rate'] = features['goal_scoring_rate'] + features['assist_rate']
        
        # Efficiency metrics
        total_goals = player_history['goals_scored'].sum()
        total_minutes = player_history['minutes'].sum()
        features['minutes_per_goal'] = total_minutes / max(1, total_goals)
        
        # Threat and ICT index
        if 'threat' in player_history.columns:
            features['attacking_threat'] = player_history['threat'].mean()
        else:
            features['attacking_threat'] = features['goal_scoring_rate'] * 100
        
        if 'ict_index' in player_history.columns:
            features['ict_index'] = player_history['ict_index'].mean()
        else:
            features['ict_index'] = (features['attacking_threat'] + features['goal_scoring_rate'] * 50)
        
        return features

# Initialize position-specific feature calculator
if not datasets['historical_gameweeks'].empty:
    position_features = PositionSpecificFeatures(datasets['historical_gameweeks'])
    print("✅ Position-specific feature calculator initialized!")
    
    # Test position-specific features
    if not datasets['players'].empty:
        # Test each position
        positions = ['Goalkeeper', 'Defender', 'Midfielder', 'Forward']
        
        for position in positions:
            position_players = datasets['players'][datasets['players']['position'] == position]
            if not position_players.empty:
                sample_player = position_players.iloc[0]
                player_id = sample_player['id']
                
                if position == 'Goalkeeper':
                    features = position_features.calculate_goalkeeper_features(player_id)
                elif position == 'Defender':
                    features = position_features.calculate_defender_features(player_id)
                elif position == 'Midfielder':
                    features = position_features.calculate_midfielder_features(player_id)
                else:  # Forward
                    features = position_features.calculate_forward_features(player_id)
                
                print(f"\n🎯 {position} features (Player {player_id}):")
                for key, value in list(features.items())[:3]:
                    print(f"  {key}: {value:.3f}")
else:
    print("⚠️ Historical data not available for position-specific features")
    position_features = None

In [None]:
# Cell 6: Team Dynamics and Player Context Features

class TeamDynamicsAnalyzer:
    """Analyze team dynamics and player roles."""
    
    def __init__(self, players_df):
        self.players_df = players_df
    
    def calculate_team_dynamics(self, player_id):
        """Calculate team dynamics features for a player."""
        player_data = self.players_df[self.players_df['id'] == player_id]
        
        if player_data.empty:
            return self._get_default_team_dynamics()
        
        player = player_data.iloc[0]
        team_id = player['team']
        team_players = self.players_df[self.players_df['team'] == team_id]
        
        dynamics = {}
        
        # Player's role in team based on price and points
        team_price_rank = (team_players['now_cost'] >= player['now_cost']).sum()
        team_points_rank = (team_players['total_points'] >= player['total_points']).sum()
        
        dynamics['price_rank_in_team'] = team_price_rank / len(team_players)
        dynamics['points_rank_in_team'] = team_points_rank / len(team_players)
        
        # Key player indicator
        dynamics['is_key_player'] = float((team_price_rank <= 3) or (team_points_rank <= 3))
        
        # Position competition
        position_players = team_players[team_players['element_type'] == player['element_type']]
        if len(position_players) > 1:
            pos_rank = (position_players['total_points'] >= player['total_points']).sum()
            dynamics['position_competition'] = pos_rank / len(position_players)
        else:
            dynamics['position_competition'] = 1.0  # No competition
        
        # Set piece and penalty likelihood
        dynamics['set_piece_likelihood'] = self._estimate_set_piece_likelihood(player, team_players)
        dynamics['penalty_likelihood'] = self._estimate_penalty_likelihood(player, team_players)
        
        # Captain potential
        ownership = player.get('selected_by_percent', 0)
        points = player.get('total_points', 0)
        dynamics['captain_potential'] = (ownership * points) / 100
        
        return dynamics
    
    def _estimate_set_piece_likelihood(self, player, team_players):
        """Estimate likelihood of being a set piece taker."""
        position = player.get('element_type', 3)
        creativity = player.get('creativity', 0)
        
        if position == 1:  # Goalkeeper
            return 0.0
        elif position == 2:  # Defender
            return 0.3 if player.get('total_points', 0) > 50 else 0.1
        elif position == 3:  # Midfielder
            return 0.7 if creativity > 50 else 0.4
        else:  # Forward
            return 0.2
    
    def _estimate_penalty_likelihood(self, player, team_players):
        """Estimate likelihood of being a penalty taker."""
        position = player.get('element_type', 3)
        price = player.get('now_cost', 50)
        goals = player.get('goals_scored', 0)
        
        if position == 1:  # Goalkeeper
            return 0.0
        elif position in [3, 4]:  # Midfielder or Forward
            base_likelihood = 0.3 if price > 80 else 0.1
            if goals > 5:
                base_likelihood += 0.2
            return min(0.8, base_likelihood)
        else:  # Defender
            return 0.05
    
    def _get_default_team_dynamics(self):
        """Return default team dynamics."""
        return {
            'price_rank_in_team': 0.5,
            'points_rank_in_team': 0.5,
            'is_key_player': 0.0,
            'position_competition': 0.5,
            'set_piece_likelihood': 0.2,
            'penalty_likelihood': 0.1,
            'captain_potential': 10.0
        }

# Initialize team dynamics analyzer
if not datasets['players'].empty:
    team_dynamics = TeamDynamicsAnalyzer(datasets['players'])
    print("✅ Team dynamics analyzer initialized!")
    
    # Test team dynamics
    sample_player_id = datasets['players']['id'].iloc[0]
    sample_dynamics = team_dynamics.calculate_team_dynamics(sample_player_id)
    print(f"\n👥 Sample team dynamics (Player {sample_player_id}):")
    for key, value in sample_dynamics.items():
        print(f"  {key}: {value:.3f}")
else:
    print("⚠️ Player data not available for team dynamics analysis")
    team_dynamics = None

# Price and ownership features
def calculate_price_ownership_features(player, all_players):
    """Calculate price and ownership related features."""
    features = {}
    
    # Basic price features
    price = player.get('now_cost', 50) / 10.0  # Convert to actual price
    points = player.get('total_points', 1)
    
    features['price'] = price
    features['price_per_point'] = price / max(1, points)
    
    # Position-relative value
    position_players = all_players[all_players['element_type'] == player['element_type']]
    
    if not position_players.empty:
        price_percentile = (position_players['now_cost'] <= player['now_cost']).mean()
        points_percentile = (position_players['total_points'] <= player['total_points']).mean()
        
        features['price_percentile_in_position'] = price_percentile
        features['points_percentile_in_position'] = points_percentile
        features['value_score'] = points_percentile - price_percentile
    else:
        features['price_percentile_in_position'] = 0.5
        features['points_percentile_in_position'] = 0.5
        features['value_score'] = 0.0
    
    # Ownership features
    ownership = player.get('selected_by_percent', 5.0)
    features['ownership_percentage'] = ownership
    features['differential_score'] = max(0, 50 - ownership) / 50  # Higher for low ownership
    
    return features

print("✅ Price and ownership feature calculator defined!")

In [None]:
# Cell 7: Create Comprehensive Feature Dataset

print("🔧 Creating comprehensive feature dataset...")
print("⏱️ This may take 5-10 minutes depending on data size")

if datasets['players'].empty:
    print("❌ No player data available for feature creation")
else:
    # Determine current gameweek for feature creation
    if not datasets['gameweeks'].empty:
        current_gw_data = datasets['gameweeks'][datasets['gameweeks']['is_current'] == True]
        current_gameweek = current_gw_data['id'].iloc[0] if not current_gw_data.empty else 15
    else:
        current_gameweek = 15  # Default
    
    print(f"📅 Using gameweek {current_gameweek} for feature creation")
    
    all_features = []
    
    # Process each player
    for idx, player in tqdm(datasets['players'].iterrows(), 
                           total=len(datasets['players']), 
                           desc="Creating features"):
        
        player_id = player['id']
        team_id = player['team']
        position = player['position']
        
        # Initialize feature dictionary
        features = {
            'player_id': player_id,
            'gameweek': current_gameweek,
            'position': position,
            'team_id': team_id,
        }
        
        # 1. Form metrics
        if datasets['historical_gameweeks'].empty:
            form_features = form_calculator._get_default_form_metrics()
        else:
            form_features = form_calculator.calculate_player_form(
                datasets['historical_gameweeks'], player_id, current_gameweek
            )
        features.update(form_features)
        
        # 2. Fixture features
        if fixture_analyzer:
            fixture_features = fixture_analyzer.get_fixture_features(team_id, current_gameweek)
            features.update(fixture_features)
        else:
            # Default fixture features
            features.update({
                'is_home': 0.5,
                'fixture_difficulty': 3.0,
                'expected_goals_for': 1.3,
                'expected_goals_against': 1.3,
                'next_5_fixtures_difficulty': 3.0
            })
        
        # 3. Team dynamics
        if team_dynamics:
            dynamics_features = team_dynamics.calculate_team_dynamics(player_id)
            features.update(dynamics_features)
        else:
            features.update(team_dynamics._get_default_team_dynamics() if team_dynamics else {})
        
        # 4. Position-specific features
        if position_features:
            if position == 'Goalkeeper':
                pos_features = position_features.calculate_goalkeeper_features(player_id)
            elif position == 'Defender':
                pos_features = position_features.calculate_defender_features(player_id)
            elif position == 'Midfielder':
                pos_features = position_features.calculate_midfielder_features(player_id)
            else:  # Forward
                pos_features = position_features.calculate_forward_features(player_id)
            
            features.update(pos_features)
        
        # 5. Price and ownership features
        price_features = calculate_price_ownership_features(player, datasets['players'])
        features.update(price_features)
        
        all_features.append(features)
    
    # Convert to DataFrame
    features_df = pd.DataFrame(all_features)
    
    print(f"\n✅ Feature creation completed!")
    print(f"📊 Dataset shape: {features_df.shape}")
    print(f"🎯 Features per player: {features_df.shape[1] - 4}")
    
    # Show feature summary by position
    print("\n📋 Feature Summary by Position:")
    print(features_df.groupby('position').size())
    
    # Feature quality checks
    print("\n🔍 Feature Quality Checks:")
    print(f"  Missing values: {features_df.isnull().sum().sum()}")
    print(f"  Duplicate players: {features_df['player_id'].duplicated().sum()}")
    
    # Show sample of important features
    important_features = [
        'form_points_5gw', 'fixture_difficulty', 'is_home',
        'price', 'ownership_percentage', 'captain_potential'
    ]
    
    available_important = [f for f in important_features if f in features_df.columns]
    
    if available_important:
        print(f"\n📊 Sample of key features:")
        sample_data = features_df[['player_id', 'position'] + available_important].head()
        print(sample_data.to_string(index=False, float_format='%.3f'))
    
    # Save to file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    features_file = f"data/processed/{timestamp}_player_features.csv"
    features_df.to_csv(features_file, index=False)
    
    print(f"\n💾 Features saved to: {features_file}")
    
    # Create feature metadata
    feature_metadata = {
        'timestamp': timestamp,
        'total_players': len(features_df),
        'total_features': features_df.shape[1] - 4,  # Exclude ID columns
        'gameweek': current_gameweek,
        'feature_categories': {
            'form_metrics': len([f for f in features_df.columns if 'form_' in f]),
            'fixture_features': len([f for f in features_df.columns if any(x in f for x in ['fixture', 'expected', 'home'])]),
            'team_dynamics': len([f for f in features_df.columns if any(x in f for x in ['rank', 'competition', 'likelihood'])]),
            'position_specific': len([f for f in features_df.columns if any(x in f for x in ['clean_sheet', 'scoring', 'creativity', 'threat'])]),
            'price_ownership': len([f for f in features_df.columns if any(x in f for x in ['price', 'ownership', 'value'])])
        },
        'feature_file': features_file
    }
    
    metadata_file = f"data/processed/{timestamp}_feature_metadata.json"
    with open(metadata_file, 'w') as f:
        json.dump(feature_metadata, f, indent=2)
    
    print(f"📋 Metadata saved to: {metadata_file}")
    
    print("\n" + "="*60)
    print("🎉 FEATURE ENGINEERING COMPLETE!")
    print("🚀 Ready for model training!")
    print(f"📁 Features saved in: data/processed/")
    print("="*60)
    
    # Show feature category breakdown
    print("\n📊 Feature Categories:")
    for category, count in feature_metadata['feature_categories'].items():
        print(f"  {category}: {count} features")