In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import sys
import warnings
import gc
import time
import pickle
from pathlib import Path
from dataclasses import dataclass, field
from typing import List, Tuple, Optional, Dict, Any
from datetime import datetime
from copy import deepcopy

import numpy as np
import pandas as pd
import polars as pl
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.feature_selection import SelectKBest, f_regression, VarianceThreshold
from sklearn.linear_model import ElasticNetCV, RidgeCV, LassoCV, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.impute import SimpleImputer
import xgboost as xgb
import lightgbm as lgb
from scipy import stats
import kaggle_evaluation.default_inference_server

warnings.filterwarnings('ignore')
np.random.seed(42)

# ============================================================================
# CONFIGURATION
# ============================================================================
@dataclass
class Config:
    # Paths
    data_path: Path = Path('/kaggle/input/hull-tactical-market-prediction/')
    output_path: Path = Path('/kaggle/working/')
    train_file: str = 'train.csv'
    test_file: str = 'test.csv'
    
    # Data parameters
    random_seed: int = 42
    max_train_rows: int = 800
    missing_threshold: float = 0.5
    min_date_id: int = 37
    
    # Feature engineering
    use_rolling_features: bool = True
    rolling_windows: List[int] = field(default_factory=lambda: [5, 10, 20, 50])
    use_lag_features: bool = True
    lag_periods: List[int] = field(default_factory=lambda: [1, 2, 3, 5, 10])
    use_technical_indicators: bool = True
    use_interaction_features: bool = True
    max_features_for_interactions: int = 10
    use_derived_features: bool = True
    
    # Preprocessing
    handle_outliers: str = 'clip'
    outlier_threshold: float = 3.0
    scaling_method: str = 'robust'
    dim_reduction_method: str = 'select_k_best'
    n_components: int = 150
    variance_threshold: float = 0.001
    use_nan_imputation: bool = True
    imputation_strategy: str = 'median'
    
    # Model parameters
    model_type: str = 'ensemble'
    cv_folds: int = 3
    validation_size: float = 0.1
    early_stopping_rounds: int = 50
    
    # Ensemble specific
    ensemble_weights: Dict[str, float] = field(default_factory=lambda: {
        'elastic': 0.25, 'xgboost': 0.35, 'lightgbm': 0.30, 'ridge': 0.10
    })
    
    # Signal generation
    signal_multiplier: float = 100.0
    signal_multiplier_low_vol: float = 600.0
    signal_multiplier_high_vol: float = 400.0
    min_signal: float = 0.0
    max_signal: float = 2.0
    use_volatility_scaling: bool = True
    volatility_window: int = 20
    target_volatility: float = 0.12
    vol_scaling: float = 1.2
    
    # Online learning
    use_online_learning: bool = True
    retrain_frequency: int = 1
    transaction_cost: float = 0.00003
    smoothing_weight: float = 0.75
    
    # Grid search
    enable_grid_search: bool = True
    max_configurations: int = 5  # Limited for speed
    time_limit_minutes: int = 10
    verbose: bool = True

# ============================================================================
# DATA LOADING UTILITIES
# ============================================================================
def load_and_clean_data(file_path: Path, target_col: str = None) -> pd.DataFrame:
    """Load CSV and ensure all columns are numeric"""
    df = pd.read_csv(file_path)
    
    exclude_cols = ['date_id', 'is_scored']
    numeric_cols = [col for col in df.columns if col not in exclude_cols]
    
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    if target_col and target_col in df.columns:
        df = df.rename(columns={target_col: 'target'})
    
    return df

# ============================================================================
# FEATURE ENGINEERING
# ============================================================================
class FeatureEngineer:
    def __init__(self, config: Config):
        self.config = config
        self.base_features = None
        self.feature_names = None
        self.median_values = {}
        
    def fit(self, df: pd.DataFrame):
        """Fit feature engineer on training data"""
        exclude_cols = ['date_id', 'target', 'forward_returns', 'is_scored', 
                       'risk_free_rate', 'market_forward_excess_returns',
                       'lagged_forward_returns', 'lagged_risk_free_rate', 
                       'lagged_market_forward_excess_returns']
        
        self.base_features = [col for col in df.columns if col not in exclude_cols]
        
        missing_rates = df[self.base_features].isnull().mean()
        self.base_features = [col for col in self.base_features 
                             if missing_rates[col] <= self.config.missing_threshold]
        
        for col in self.base_features:
            median_val = df[col].median()
            self.median_values[col] = median_val if not pd.isna(median_val) else 0.0
        
        return self
    
    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """Transform dataframe with feature engineering"""
        result = pd.DataFrame(index=df.index)
        
        for col in self.base_features:
            if col in df.columns:
                result[col] = df[col].fillna(self.median_values.get(col, 0))
            else:
                result[col] = 0.0
        
        if self.config.use_derived_features:
            if all(col in result.columns for col in ['I1', 'I2', 'I7', 'I9', 'M11']):
                result['U1'] = result['I2'] - result['I1']
                denominator = (result['I2'] + result['I9'] + result['I7']) / 3 + 1e-10
                result['U2'] = result['M11'] / denominator
        
        if self.config.use_interaction_features:
            interactions = [
                ('V1', 'S1', 'V1_S1'),
                ('M11', 'V1', 'M11_V1'),
                ('I9', 'S1', 'I9_S1'),
                ('P1', 'V1', 'P1_V1'),
                ('E1', 'S1', 'E1_S1')
            ]
            for col1, col2, new_col in interactions:
                if col1 in result.columns and col2 in result.columns:
                    result[new_col] = result[col1] * result[col2]
        
        if self.config.use_technical_indicators:
            for col in ['V1', 'S1', 'M11', 'P1']:
                if col in result.columns:
                    result = self._add_technical_indicators(result, col)
        
        if self.config.use_rolling_features:
            for col in ['V1', 'S1', 'M11', 'I9', 'P1']:
                if col in result.columns:
                    for window in self.config.rolling_windows:
                        result[f'{col}_roll_mean_{window}'] = result[col].rolling(
                            window=window, min_periods=1).mean()
                        result[f'{col}_roll_std_{window}'] = result[col].rolling(
                            window=window, min_periods=1).std().fillna(0)
        
        if self.config.use_lag_features:
            for col in ['V1', 'S1', 'I9', 'P1']:
                if col in result.columns:
                    for lag in self.config.lag_periods:
                        result[f'{col}_lag_{lag}'] = result[col].shift(lag).fillna(
                            self.median_values.get(col, 0))
                        result[f'{col}_diff_{lag}'] = (result[col] - result[col].shift(lag)).fillna(0)
        
        result = result.fillna(0)
        result = result.replace([np.inf, -np.inf], 0)
        
        self.feature_names = result.columns.tolist()
        
        return result
    
    def _add_technical_indicators(self, df: pd.DataFrame, col: str) -> pd.DataFrame:
        """Add RSI, Bollinger Bands position, and rate of change"""
        delta = df[col].diff()
        gain = delta.where(delta > 0, 0).rolling(window=14, min_periods=1).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14, min_periods=1).mean()
        rs = gain / (loss + 1e-10)
        df[f'{col}_rsi'] = 100 - (100 / (1 + rs))
        
        rolling_mean = df[col].rolling(window=20, min_periods=1).mean()
        rolling_std = df[col].rolling(window=20, min_periods=1).std().fillna(0)
        upper = rolling_mean + (2 * rolling_std)
        lower = rolling_mean - (2 * rolling_std)
        band_width = upper - lower + 1e-10
        df[f'{col}_bb_position'] = (df[col] - lower) / band_width
        
        df[f'{col}_roc'] = df[col].pct_change(periods=10).fillna(0)
        
        return df

# ============================================================================
# DATA PROCESSOR
# ============================================================================
class DataProcessor:
    def __init__(self, config: Config):
        self.config = config
        self.feature_engineer = FeatureEngineer(config)
        self.scaler = None
        self.feature_selector = None
        self.imputer = None
        self.final_feature_names = None
        
    def fit(self, df: pd.DataFrame) -> 'DataProcessor':
        """Fit data processor on training data"""
        self.feature_engineer.fit(df)
        features_df = self.feature_engineer.transform(df)
        X = features_df.values
        
        if self.config.handle_outliers == 'clip':
            X = self._clip_outliers(X)
        
        if self.config.use_nan_imputation:
            self.imputer = SimpleImputer(strategy=self.config.imputation_strategy)
            X = self.imputer.fit_transform(X)
        
        if self.config.scaling_method == 'standard':
            self.scaler = StandardScaler()
        elif self.config.scaling_method == 'robust':
            self.scaler = RobustScaler()
        elif self.config.scaling_method == 'minmax':
            self.scaler = MinMaxScaler()
        else:
            self.scaler = None
        
        if self.scaler:
            X = self.scaler.fit_transform(X)
        
        if self.config.dim_reduction_method != 'none' and 'target' in df.columns:
            y = df['target'].values
            y = np.nan_to_num(y, nan=0.0)
            
            if self.config.dim_reduction_method == 'pca':
                n_components = min(self.config.n_components, min(X.shape) - 1)
                self.feature_selector = PCA(n_components=n_components, random_state=self.config.random_seed)
                self.feature_selector.fit(X)
            elif self.config.dim_reduction_method == 'select_k_best':
                k = min(self.config.n_components, X.shape[1])
                self.feature_selector = SelectKBest(score_func=f_regression, k=k)
                self.feature_selector.fit(X, y)
            elif self.config.dim_reduction_method == 'variance':
                self.feature_selector = VarianceThreshold(threshold=self.config.variance_threshold)
                self.feature_selector.fit(X)
        
        self._update_final_feature_names()
        return self
    
    def transform(self, df: pd.DataFrame) -> np.ndarray:
        """Transform dataframe to model-ready features"""
        features_df = self.feature_engineer.transform(df)
        X = features_df.values
        
        if self.config.handle_outliers == 'clip':
            X = self._clip_outliers(X)
        
        if self.imputer:
            X = self.imputer.transform(X)
        
        if self.scaler:
            X = self.scaler.transform(X)
        
        if self.feature_selector:
            X = self.feature_selector.transform(X)
        
        X = np.nan_to_num(X, nan=0.0, posinf=1e10, neginf=-1e10)
        
        return X
    
    def _clip_outliers(self, X: np.ndarray) -> np.ndarray:
        """Clip outliers using z-score threshold"""
        X_clipped = X.copy()
        for i in range(X.shape[1]):
            col_data = X[:, i]
            valid_data = col_data[~np.isnan(col_data)]
            if len(valid_data) > 0:
                mean = np.mean(valid_data)
                std = np.std(valid_data)
                if std > 0:
                    lower = mean - self.config.outlier_threshold * std
                    upper = mean + self.config.outlier_threshold * std
                    X_clipped[:, i] = np.clip(X[:, i], lower, upper)
        return X_clipped
    
    def _update_final_feature_names(self):
        """Update final feature names after selection"""
        if self.feature_selector and hasattr(self.feature_selector, 'get_support'):
            mask = self.feature_selector.get_support()
            self.final_feature_names = [f for f, m in zip(self.feature_engineer.feature_names, mask) if m]
        else:
            self.final_feature_names = self.feature_engineer.feature_names

# ============================================================================
# MODEL FACTORY
# ============================================================================
class ModelFactory:
    @staticmethod
    def create_model(model_type: str, config: Config):
        """Create model based on type"""
        if model_type == 'elastic':
            return ElasticNetCV(
                l1_ratio=[0.1, 0.5, 0.7, 0.9],
                alphas=np.logspace(-4, 1, 20),
                cv=config.cv_folds,
                max_iter=10000,
                random_state=config.random_seed
            )
        elif model_type == 'xgboost':
            return xgb.XGBRegressor(
                n_estimators=300,
                max_depth=6,
                learning_rate=0.03,
                subsample=0.8,
                colsample_bytree=0.8,
                min_child_weight=3,
                gamma=0.1,
                reg_alpha=0.1,
                reg_lambda=1.0,
                random_state=config.random_seed,
                n_jobs=-1,
                objective='reg:squarederror',
                verbosity=0
            )
        elif model_type == 'lightgbm':
            return lgb.LGBMRegressor(
                n_estimators=300,
                max_depth=6,
                learning_rate=0.03,
                num_leaves=31,
                subsample=0.8,
                colsample_bytree=0.8,
                min_child_samples=20,
                reg_alpha=0.1,
                reg_lambda=1.0,
                random_state=config.random_seed,
                n_jobs=-1,
                verbosity=-1
            )
        elif model_type == 'ridge':
            return RidgeCV(
                alphas=np.logspace(-4, 2, 50),
                cv=config.cv_folds
            )
        elif model_type == 'gradient_boost':
            return GradientBoostingRegressor(
                n_estimators=200,
                max_depth=5,
                learning_rate=0.05,
                subsample=0.8,
                min_samples_split=5,
                min_samples_leaf=3,
                loss='huber',
                random_state=config.random_seed
            )
        else:
            return RidgeCV(alphas=np.logspace(-4, 2, 50))

# ============================================================================
# ENSEMBLE MODEL
# ============================================================================
class EnsembleModel:
    def __init__(self, config: Config):
        self.config = config
        self.models = {}
        self.weights = config.ensemble_weights.copy()
        
    def fit(self, X: np.ndarray, y: np.ndarray, X_val: Optional[np.ndarray] = None, 
            y_val: Optional[np.ndarray] = None):
        """Fit ensemble models"""
        X_clean = np.nan_to_num(X, nan=0.0)
        y_clean = np.nan_to_num(y, nan=0.0)
        
        if X_val is not None:
            X_val_clean = np.nan_to_num(X_val, nan=0.0)
            y_val_clean = np.nan_to_num(y_val, nan=0.0)
        
        for name, weight in self.weights.items():
            if weight > 0:
                try:
                    model = ModelFactory.create_model(name, self.config)
                    
                    if name in ['xgboost', 'lightgbm'] and X_val is not None:
                        if name == 'xgboost':
                            model.fit(
                                X_clean, y_clean,
                                eval_set=[(X_val_clean, y_val_clean)],
                                early_stopping_rounds=self.config.early_stopping_rounds,
                                verbose=False
                            )
                        else:
                            model.fit(
                                X_clean, y_clean,
                                eval_set=[(X_val_clean, y_val_clean)],
                                callbacks=[
                                    lgb.early_stopping(self.config.early_stopping_rounds),
                                    lgb.log_evaluation(0)
                                ]
                            )
                    else:
                        model.fit(X_clean, y_clean)
                    
                    self.models[name] = model
                    
                except Exception as e:
                    if self.config.verbose:
                        print(f"Failed to train {name}: {str(e)[:100]}")
                    self.weights[name] = 0
        
        total_weight = sum(self.weights.values())
        if total_weight > 0:
            for name in self.weights:
                self.weights[name] /= total_weight
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Generate ensemble predictions"""
        X_clean = np.nan_to_num(X, nan=0.0)
        predictions = np.zeros(len(X_clean))
        
        for name, model in self.models.items():
            if name in self.weights and self.weights[name] > 0:
                try:
                    pred = model.predict(X_clean)
                    predictions += pred * self.weights[name]
                except Exception as e:
                    if self.config.verbose:
                        print(f"Prediction failed for {name}: {str(e)[:100]}")
        
        return predictions

# ============================================================================
# GRID SEARCH OPTIMIZER
# ============================================================================
class GridSearchOptimizer:
    def __init__(self, config: Config):
        self.config = config
        self.results = []
        self.best_config = None
        self.best_score = float('inf')
        
    def generate_configurations(self) -> List[Config]:
        """Generate limited set of high-quality configurations"""
        configs = []
        
        best_combinations = [
            {
                'scaling_method': 'robust',
                'dim_reduction_method': 'select_k_best',
                'n_components': 150,
                'handle_outliers': 'clip',
                'model_type': 'ensemble',
                'use_rolling_features': True,
                'use_technical_indicators': True,
                'use_interaction_features': True,
                'signal_multiplier_low_vol': 600.0,
                'signal_multiplier_high_vol': 400.0,
                'ensemble_weights': {'elastic': 0.25, 'xgboost': 0.35, 'lightgbm': 0.30, 'ridge': 0.10}
            },
            {
                'scaling_method': 'standard',
                'dim_reduction_method': 'pca',
                'n_components': 100,
                'handle_outliers': 'clip',
                'model_type': 'ensemble',
                'use_rolling_features': True,
                'use_technical_indicators': False,
                'use_interaction_features': True,
                'signal_multiplier_low_vol': 500.0,
                'signal_multiplier_high_vol': 350.0,
                'ensemble_weights': {'elastic': 0.20, 'xgboost': 0.40, 'lightgbm': 0.40, 'ridge': 0.0}
            },
            {
                'scaling_method': 'robust',
                'dim_reduction_method': 'none',
                'n_components': 200,
                'handle_outliers': 'clip',
                'model_type': 'lightgbm',
                'use_rolling_features': False,
                'use_technical_indicators': True,
                'use_interaction_features': True,
                'signal_multiplier_low_vol': 700.0,
                'signal_multiplier_high_vol': 450.0,
                'ensemble_weights': {'elastic': 0.0, 'xgboost': 0.0, 'lightgbm': 1.0, 'ridge': 0.0}
            }
        ]
        
        for params in best_combinations[:self.config.max_configurations]:
            config = deepcopy(self.config)
            for key, value in params.items():
                setattr(config, key, value)
            configs.append(config)
        
        return configs
    
    def evaluate_configuration(self, config: Config, train_df: pd.DataFrame) -> Dict[str, Any]:
        """Evaluate a single configuration"""
        try:
            processor = DataProcessor(config)
            processor.fit(train_df)
            
            X = processor.transform(train_df)
            y = train_df['target'].values if 'target' in train_df.columns else np.zeros(len(train_df))
            y = np.nan_to_num(y, nan=0.0)
            
            split_idx = int(len(X) * (1 - config.validation_size))
            X_train, X_val = X[:split_idx], X[split_idx:]
            y_train, y_val = y[:split_idx], y[split_idx:]
            
            if config.model_type == 'ensemble':
                model = EnsembleModel(config)
                model.fit(X_train, y_train, X_val, y_val)
            else:
                model = ModelFactory.create_model(config.model_type, config)
                if config.model_type in ['xgboost', 'lightgbm']:
                    if config.model_type == 'xgboost':
                        model.fit(
                            X_train, y_train,
                            eval_set=[(X_val, y_val)],
                            early_stopping_rounds=config.early_stopping_rounds,
                            verbose=False
                        )
                    else:
                        model.fit(
                            X_train, y_train,
                            eval_set=[(X_val, y_val)],
                            callbacks=[
                                lgb.early_stopping(config.early_stopping_rounds),
                                lgb.log_evaluation(0)
                            ]
                        )
                else:
                    model.fit(X_train, y_train)
            
            y_pred = model.predict(X_val) if hasattr(model, 'predict') else np.zeros(len(X_val))
            rmse = np.sqrt(mean_squared_error(y_val, y_pred))
            mae = mean_absolute_error(y_val, y_pred)
            
            if len(y_pred) > 1:
                returns = y_pred[:-1] * y_val[1:]
                if np.std(returns) > 0:
                    sharpe = np.mean(returns) / np.std(returns) * np.sqrt(252)
                else:
                    sharpe = 0
            else:
                sharpe = 0
            
            return {
                'config': config,
                'model': model,
                'processor': processor,
                'rmse': rmse,
                'mae': mae,
                'sharpe': sharpe,
                'score': rmse
            }
            
        except Exception as e:
            if config.verbose:
                print(f"Configuration failed: {str(e)[:200]}")
            return None
    
    def run(self, train_df: pd.DataFrame) -> Tuple[Config, Any, Any]:
        """Run grid search"""
        configs = self.generate_configurations()
        start_time = time.time()
        
        if self.config.verbose:
            print(f"Testing {len(configs)} configurations...")
        
        best_result = None
        for i, config in enumerate(configs):
            if (time.time() - start_time) / 60 > self.config.time_limit_minutes:
                if self.config.verbose:
                    print("Time limit reached")
                break
            
            if self.config.verbose:
                print(f"Configuration {i+1}/{len(configs)}...")
            
            result = self.evaluate_configuration(config, train_df)
            
            if result is not None:
                self.results.append(result)
                
                if result['score'] < self.best_score:
                    self.best_score = result['score']
                    self.best_config = config
                    best_result = result
                    
                    if self.config.verbose:
                        print(f"  New best score: {self.best_score:.6f}")
        
        if best_result is not None:
            return best_result['config'], best_result['model'], best_result['processor']
        
        return self.config, None, None

# ============================================================================
# MAIN PIPELINE
# ============================================================================
class HullTacticalPipeline:
    def __init__(self, config: Optional[Config] = None):
        self.config = config or Config()
        self.processor = None
        self.model = None
        self.train_df = None
        self.last_allocation = 0.0
        self.v1_median = None
        self.test_row_count = 0
        
    def train(self, train_df: pd.DataFrame) -> 'HullTacticalPipeline':
        """Train the pipeline"""
        self.train_df = train_df.copy()
        
        if 'V1' in train_df.columns:
            v1_values = pd.to_numeric(train_df['V1'], errors='coerce')
            self.v1_median = v1_values.median() if not v1_values.isna().all() else 0.01
        else:
            self.v1_median = 0.01
        
        if self.config.enable_grid_search:
            if self.config.verbose:
                print("Running grid search...")
            
            optimizer = GridSearchOptimizer(self.config)
            best_config, best_model, best_processor = optimizer.run(train_df)
            
            if best_model is not None:
                self.config = best_config
                self.model = best_model
                self.processor = best_processor
                
                if self.config.verbose:
                    print(f"Grid search complete. Best RMSE: {optimizer.best_score:.6f}")
            else:
                if self.config.verbose:
                    print("Grid search failed, using default configuration")
        
        if self.processor is None:
            self.processor = DataProcessor(self.config)
            self.processor.fit(train_df)
        
        if self.model is None:
            X = self.processor.transform(train_df)
            y = train_df['target'].values if 'target' in train_df.columns else np.zeros(len(train_df))
            y = np.nan_to_num(y, nan=0.0)
            
            split_idx = int(len(X) * (1 - self.config.validation_size))
            X_train, X_val = X[:split_idx], X[split_idx:]
            y_train, y_val = y[:split_idx], y[split_idx:]
            
            if self.config.model_type == 'ensemble':
                self.model = EnsembleModel(self.config)
                self.model.fit(X_train, y_train, X_val, y_val)
            else:
                self.model = ModelFactory.create_model(self.config.model_type, self.config)
                if self.config.model_type in ['xgboost', 'lightgbm']:
                    if self.config.model_type == 'xgboost':
                        self.model.fit(
                            X_train, y_train,
                            eval_set=[(X_val, y_val)],
                            early_stopping_rounds=self.config.early_stopping_rounds,
                            verbose=False
                        )
                    else:
                        self.model.fit(
                            X_train, y_train,
                            eval_set=[(X_val, y_val)],
                            callbacks=[
                                lgb.early_stopping(self.config.early_stopping_rounds),
                                lgb.log_evaluation(0)
                            ]
                        )
                else:
                    self.model.fit(X_train, y_train)
        
        return self
    
    def update_with_new_data(self, new_row: pd.DataFrame):
        """Update training data with new observation for online learning"""
        if self.config.use_online_learning and self.train_df is not None:
            self.train_df = pd.concat([self.train_df, new_row], ignore_index=True)
            
            if len(self.train_df) > self.config.max_train_rows:
                self.train_df = self.train_df.tail(self.config.max_train_rows).reset_index(drop=True)
            
            if self.test_row_count % self.config.retrain_frequency == 0 and self.test_row_count > 0:
                if self.config.verbose and self.test_row_count % 50 == 0:
                    print(f"Retraining at row {self.test_row_count}...")
                
                try:
                    self.processor.fit(self.train_df)
                    
                    X = self.processor.transform(self.train_df)
                    y = self.train_df['target'].values
                    y = np.nan_to_num(y, nan=0.0)
                    
                    if self.config.model_type == 'ensemble':
                        self.model = EnsembleModel(self.config)
                        self.model.fit(X, y)
                    else:
                        self.model = ModelFactory.create_model(self.config.model_type, self.config)
                        self.model.fit(X, y)
                except Exception as e:
                    if self.config.verbose:
                        print(f"Retraining failed: {str(e)[:100]}")
    
    def predict(self, test_df: pd.DataFrame) -> float:
        """Generate prediction for single row"""
        if self.processor is None or self.model is None:
            return 1.0
        
        try:
            X_test = self.processor.transform(test_df)
            
            if hasattr(self.model, 'predict'):
                raw_pred = self.model.predict(X_test)
                if isinstance(raw_pred, np.ndarray) and len(raw_pred) > 0:
                    raw_pred = raw_pred[0]
                else:
                    raw_pred = 0.0
            else:
                raw_pred = 0.0
            
            volatility = 0.01
            if 'V1' in test_df.columns:
                v1_val = pd.to_numeric(test_df['V1'].iloc[0], errors='coerce')
                if not pd.isna(v1_val):
                    volatility = max(v1_val, 0.001)
            elif self.train_df is not None and 'target' in self.train_df.columns:
                recent_returns = self.train_df['target'].tail(self.config.volatility_window).values
                if len(recent_returns) > 1:
                    vol_std = np.std(recent_returns)
                    if vol_std > 0:
                        volatility = vol_std
            
            use_low_vol_multiplier = False
            if self.v1_median is not None and 'V1' in test_df.columns:
                v1_val = pd.to_numeric(test_df['V1'].iloc[0], errors='coerce')
                if not pd.isna(v1_val):
                    use_low_vol_multiplier = v1_val < self.v1_median
            
            if use_low_vol_multiplier:
                signal_mult = self.config.signal_multiplier_low_vol
            else:
                signal_mult = self.config.signal_multiplier_high_vol
            
            signal = raw_pred * signal_mult
            
            if self.config.use_volatility_scaling:
                signal = signal / (volatility * self.config.vol_scaling)
            
            signal = np.clip(signal, self.config.min_signal, self.config.max_signal)
            
            allocation = (self.config.smoothing_weight * signal + 
                         (1 - self.config.smoothing_weight) * self.last_allocation)
            
            allocation *= (1 - self.config.transaction_cost)
            
            self.last_allocation = allocation
            self.test_row_count += 1
            
            return float(allocation)
            
        except Exception as e:
            if self.config.verbose:
                print(f"Prediction error: {str(e)[:100]}")
            return float(self.last_allocation) if self.last_allocation > 0 else 1.0

# ============================================================================
# MAIN EXECUTION
# ============================================================================
def main():
    """Main execution function"""
    print("="*60)
    print("Hull Tactical Market Prediction - Enhanced Pipeline")
    print("="*60)
    
    config = Config(
        enable_grid_search=True,
        max_configurations=3,
        time_limit_minutes=5,
        use_rolling_features=True,
        use_technical_indicators=True,
        use_interaction_features=True,
        use_derived_features=True,
        use_online_learning=True,
        retrain_frequency=1,
        signal_multiplier_low_vol=600.0,
        signal_multiplier_high_vol=400.0,
        max_train_rows=800,
        verbose=True
    )
    
    print("Loading training data...")
    train_df = load_and_clean_data(
        config.data_path / config.train_file,
        target_col='market_forward_excess_returns'
    )
    
    train_df = train_df[train_df['date_id'] >= config.min_date_id].copy()
    train_df = train_df.tail(config.max_train_rows).reset_index(drop=True)
    
    print(f"Loaded {len(train_df)} training samples")
    
    pipeline = HullTacticalPipeline(config)
    
    print("\nTraining pipeline...")
    pipeline.train(train_df)
    
    print("Training complete!")
    
    previous_lagged = None
    
    def predict(test: pl.DataFrame) -> float:
        """Prediction function for Kaggle API"""
        nonlocal previous_lagged
        
        test_pd = test.to_pandas()
        
        exclude_cols = ['date_id', 'is_scored']
        for col in test_pd.columns:
            if col not in exclude_cols:
                test_pd[col] = pd.to_numeric(test_pd[col], errors='coerce')
        
        if previous_lagged is not None and 'lagged_market_forward_excess_returns' in previous_lagged.columns:
            update_row = previous_lagged.copy()
            if 'lagged_market_forward_excess_returns' in update_row.columns:
                update_row['target'] = update_row['lagged_market_forward_excess_returns']
                update_row = update_row.drop(columns=['lagged_market_forward_excess_returns'], errors='ignore')
            pipeline.update_with_new_data(update_row)
        
        allocation = pipeline.predict(test_pd)
        
        previous_lagged = test_pd.copy()
        
        return allocation
    
    print("\nStarting inference server...")
    
    try:
        inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)
        
        if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
            inference_server.serve()
        else:
            inference_server.run_local_gateway((str(config.data_path),))
    except Exception as e:
        print(f"Server error: {str(e)}")
        print("Testing prediction function locally...")
        
        test_sample = train_df.iloc[[0]].copy()
        test_pl = pl.from_pandas(test_sample)
        result = predict(test_pl)
        print(f"Test prediction: {result}")

if __name__ == "__main__":
    main()