In [None]:
class ModelDevelopment:
    def train_regime_specific_models(self, train_data: pl.DataFrame, val_data: pl.DataFrame) -> Dict:
        """Train separate models for different volatility regimes"""
        # First calculate regimes for both datasets
        train_data = self.calculate_regime(train_data)
        val_data = self.calculate_regime(val_data)
        
        regime_models = {}
        regime_scores = {}
        
        for regime in ['high', 'normal', 'low']:
            # Filter data for specific regime
            regime_train = train_data.filter(pl.col('vol_regime') == regime)
            regime_val = val_data.filter(pl.col('vol_regime') == regime)
            
            if len(regime_train) > 1000:  # Only train if enough samples
                model_dict = self.train_base_model(regime_train, regime_val)
                regime_models[regime] = model_dict['model']
                regime_scores[regime] = model_dict['validation_score']
        
        return {
            'models': regime_models,
            'scores': regime_scores
        }
    

    def create_ensemble_prediction(self, regime_models: Dict, data: pl.DataFrame) -> np.ndarray:
        """Combine predictions from regime-specific models"""
        # Calculate regime
        data = self.calculate_regime(data)
        X = data.select(self.feature_cols).to_numpy()
        
        # Get predictions from each model
        predictions = np.zeros(len(data))
        
        for regime, model in regime_models['models'].items():
            mask = data['vol_regime'] == regime
            if mask.any():
                predictions[mask] = model.predict(X[mask])
        
        return predictions
    


    def calculate_position_sizes(self, predictions: np.ndarray, 
                               confidences: np.ndarray,
                               max_position: float = 1.0) -> np.ndarray:
        """Calculate position sizes based on prediction confidence"""
        # Scale positions based on confidence
        scaled_positions = predictions * confidences
        
        # Apply position limits
        positions = np.clip(scaled_positions, -max_position, max_position)
        
        # Ensure positions sum to zero (market neutral)
        positions = positions - np.mean(positions)
        
        return positions



    def apply_risk_controls(self, positions: np.ndarray, 
                          data: pl.DataFrame,
                          max_leverage: float = 5.0,
                          max_concentration: float = 0.2) -> np.ndarray:
        """Apply risk management rules to positions"""
        # Calculate portfolio metrics
        leverage = np.sum(np.abs(positions))
        
        # Scale down if exceeding leverage limits
        if leverage > max_leverage:
            positions = positions * (max_leverage / leverage)
        
        # Check concentration limits
        for symbol in data['symbol_id'].unique():
            symbol_mask = data['symbol_id'] == symbol
            symbol_exposure = np.sum(np.abs(positions[symbol_mask]))
            
            if symbol_exposure > max_concentration:
                positions[symbol_mask] *= (max_concentration / symbol_exposure)
        
        return positions
    


    def create_submission(self, test_data: pl.DataFrame) -> pl.DataFrame:
        """Create final submission with positions"""
        # Get ensemble predictions
        predictions = self.create_ensemble_prediction(self.regime_models, test_data)
        
        # Calculate confidence scores
        confidences = np.abs(predictions)  # Simple confidence measure
        
        # Calculate positions
        positions = self.calculate_position_sizes(predictions, confidences)
        
        # Apply risk controls
        final_positions = self.apply_risk_controls(positions, test_data)
        
        # Create submission dataframe
        submission = pl.DataFrame({
            'row_id': test_data['row_id'],
            'action': final_positions
        })
        
        return submission