In [9]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import Dict, List, Any, Optional
import json
import warnings
from datetime import datetime, timedelta
import logging
from pathlib import Path
from data_loading import build_panel_dataset

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Suppress warnings
warnings.filterwarnings('ignore')

In [4]:
# Define paths relative to the notebook location
data_folder = Path('..') / '..' / 'Data' / 'Short Interest Data'
crsp_file = Path('..') / '..' / 'Data' / 'CRSP Market Data 2.csv'
ibes_file = Path('..') / '..' / 'Data' / 'IBES Recommendations.csv'
compustat_file = Path('..') / '..' / 'Data' / 'Compustat Fundamentals.csv'

# Load with your paths
panel_df = build_panel_dataset(
    data_folder=data_folder,
    crsp_file=crsp_file,
    ibes_file=ibes_file,
    compustat_file=compustat_file
)

INFO:data_loading:Starting panel dataset construction...
INFO:data_loading:Found 110 CSV files to merge
INFO:data_loading:Successfully loaded shrt20230315.csv with 19,946 rows
INFO:data_loading:Successfully loaded shrt20221230.csv with 20,186 rows
INFO:data_loading:Successfully loaded shrt20220729.csv with 20,601 rows
INFO:data_loading:Successfully loaded shrt20220715.csv with 20,732 rows
INFO:data_loading:Successfully loaded shrt20210615.csv with 20,251 rows
INFO:data_loading:Successfully loaded shrt20240430.csv with 19,468 rows
INFO:data_loading:Successfully loaded shrt20221031.csv with 20,696 rows
INFO:data_loading:Successfully loaded shrt20240815.csv with 19,530 rows
INFO:data_loading:Successfully loaded shrt20220930.csv with 20,742 rows
INFO:data_loading:Successfully loaded shrt20210415.csv with 19,816 rows
INFO:data_loading:Successfully loaded shrt20241115.csv with 19,729 rows
INFO:data_loading:Successfully loaded shrt20241129.csv with 19,946 rows
INFO:data_loading:Successfully l

In [107]:
import pandas as pd
import numpy as np
import logging
from typing import Dict, Any

# Setup logger for demonstration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [108]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import json

class FeatureEngineeringAgentLLM:
    def __init__(self, model_name: str):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

    def _build_prompt(self, data_analysis: Dict, target_column: str) -> str:
        prompt = f"""
You are a feature engineering expert for time series forecasting.

Data Analysis:
- Shape: {data_analysis['shape']}
- Target Column: {target_column}
- Numeric Columns: {data_analysis['numeric_columns']}
- Categorical Columns: {data_analysis['categorical_columns']}
- Time Columns: {data_analysis.get('time_columns', [])}

Task: Generate a feature engineering strategy for bi-weekly rate forecasting.

Provide your response as a JSON with these sections:
1. "lag_features": List of lag periods to create (e.g., [1, 2, 4, 8])
2. "rolling_features": Rolling window calculations (e.g., ["mean_7", "std_14"])
3. "interaction_features": Feature combinations to try
4. "time_features": Time-based features to extract
5. "reasoning": Explanation of your strategy

Response:
"""
        return prompt

    def _call_llm(self, prompt: str) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=300,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        strategy_text = response.split("Response:")[-1].strip()
        return strategy_text


    def _create_lag_features(self, data: pd.DataFrame, lag_list: list, numeric_cols: list) -> pd.DataFrame:
        for lag in lag_list:
            for col in numeric_cols:
                data[f"{col}_lag_{lag}"] = data.groupby(level='PERMNO')[col].shift(lag)
        return data

    def _create_rolling_features(self, data: pd.DataFrame, rolling_list: list, numeric_cols: list) -> pd.DataFrame:
        for feature in rolling_list:
            if feature.startswith("mean_"):
                window = int(feature.split("_")[1])
                for col in numeric_cols:
                    data[f"{col}_rolling_mean_{window}"] = data.groupby(level='PERMNO')[col].rolling(window=window, min_periods=1).mean().reset_index(level=0, drop=True)
            elif feature.startswith("std_"):
                window = int(feature.split("_")[1])
                for col in numeric_cols:
                    data[f"{col}_rolling_std_{window}"] = data.groupby(level='PERMNO')[col].rolling(window=window, min_periods=1).std().reset_index(level=0, drop=True)
        return data

    def _create_time_features(self, data: pd.DataFrame, time_features: list) -> pd.DataFrame:
        # Extract time from the 'date' level in MultiIndex
        if 'date' in data.index.names:
            dates = data.index.get_level_values('date')
            if "month" in time_features:
                data['month'] = dates.month
            if "quarter" in time_features:
                data['quarter'] = dates.quarter
            if "day_of_week" in time_features:
                data['day_of_week'] = dates.dayofweek
            if "is_weekend" in time_features:
                data['is_weekend'] = dates.dayofweek.isin([5, 6]).astype(int)
        return data

    def process_features(self, data: pd.DataFrame, target_column: str, data_analysis: Dict) -> Dict[str, Any]:
        prompt = self._build_prompt(data_analysis, target_column)
        
        # Replace mock with real LLM call when ready:
        # strategy_text = '''
        #     {
        #     "lag_features": [1, 2, 4],
        #     "rolling_features": ["mean_7", "std_7"],
        #     "interaction_features": [],
        #     "time_features": ["month", "quarter", "day_of_week", "is_weekend"],
        #     "reasoning": "Use recent lags, weekly rolling stats, and common time features to capture seasonality."
        #     }
        #             '''
        strategy_text = self._call_llm(prompt)
        print('LLM strategy')
        print(strategy_text)
        try:
            strategy = json.loads(strategy_text)
        except json.JSONDecodeError:
            strategy = {
                "lag_features": [1,2],
                "rolling_features": ["mean_7"],
                "interaction_features": [],
                "time_features": ["month"],
                "reasoning": "Fallback strategy."
            }

        processed_data = data.copy()
        
        # Create features as per strategy
        processed_data = self._create_lag_features(processed_data, strategy.get("lag_features", []), data_analysis['numeric_columns'])
        processed_data = self._create_rolling_features(processed_data, strategy.get("rolling_features", []), data_analysis['numeric_columns'])
        processed_data = self._create_time_features(processed_data, strategy.get("time_features", []))

        # Dropping NaN rows generated by lags and rolling features
        processed_data = processed_data.dropna(how='all').fillna(0)

        # Final feature names
        feature_names = list(processed_data.columns)

        return {
            "strategy": strategy,
            "processed_data": processed_data,
            "feature_names": feature_names,
            "engineering_log": f"Applied feature engineering strategy: {strategy.get('reasoning')}"
        }


In [109]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import json
from typing import Dict, Any, Tuple, List
import warnings
warnings.filterwarnings('ignore')

class ValidationStrategy:
    """Handles different validation strategies for time series forecasting"""
    
    @staticmethod
    def time_based_split(data: pd.DataFrame, test_size: float = 0.2, validation_size: float = 0.1):
        """
        Time-based train/validation/test split for panel data
        
        Args:
            data: Panel DataFrame with MultiIndex (PERMNO, date)
            test_size: Proportion of recent data for testing
            validation_size: Proportion for validation (from remaining data)
        """
        # Get unique dates and sort them
        dates = sorted(data.index.get_level_values('date').unique())
        n_dates = len(dates)
        
        # Calculate split points
        test_start_idx = int(n_dates * (1 - test_size))
        val_start_idx = int((n_dates - int(n_dates * test_size)) * (1 - validation_size))
        
        train_dates = dates[:val_start_idx]
        val_dates = dates[val_start_idx:test_start_idx] 
        test_dates = dates[test_start_idx:]
        
        # Split data based on dates
        train_data = data[data.index.get_level_values('date').isin(train_dates)]
        val_data = data[data.index.get_level_values('date').isin(val_dates)]
        test_data = data[data.index.get_level_values('date').isin(test_dates)]
        
        return train_data, val_data, test_data, {
            'train_dates': (train_dates[0], train_dates[-1]),
            'val_dates': (val_dates[0], val_dates[-1]) if val_dates else None,
            'test_dates': (test_dates[0], test_dates[-1]),
            'train_size': len(train_data),
            'val_size': len(val_data),
            'test_size': len(test_data)
        }
    
    @staticmethod
    def expanding_window_validation(data: pd.DataFrame, target_column: str, 
                                  min_train_size: float = 0.5, step_size: int = 14):
        """
        Expanding window validation for time series (walk-forward analysis)
        
        Args:
            data: Panel DataFrame
            target_column: Target variable name
            min_train_size: Minimum training size as proportion of total data
            step_size: Days to step forward for each validation
        """
        dates = sorted(data.index.get_level_values('date').unique())
        n_dates = len(dates)
        min_train_dates = int(n_dates * min_train_size)
        
        validation_results = []
        
        for i in range(min_train_dates, n_dates, step_size):
            if i >= n_dates - 1:  # Need at least one test point
                break
                
            train_dates = dates[:i]
            test_dates = dates[i:i+1]  # Single point prediction
            
            train_data = data[data.index.get_level_values('date').isin(train_dates)]
            test_data = data[data.index.get_level_values('date').isin(test_dates)]
            
            if len(test_data) > 0:
                validation_results.append({
                    'train_data': train_data,
                    'test_data': test_data,
                    'train_end_date': train_dates[-1],
                    'test_date': test_dates[0]
                })
        
        return validation_results

class ForecastingAgentLLM:
    """ Forecasting agent with comprehensive validation capabilities"""
    
    def __init__(self, model_name: str = None):
        self.models = {
            'random_forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'linear_regression': LinearRegression(),
        }
        self.scaler = StandardScaler()
        self.selected_model = None
        self.model_name = model_name
        self.validation_strategy = ValidationStrategy()
        
        # Initialize LLM components if provided
        if model_name:
            from transformers import AutoTokenizer, AutoModelForCausalLM
            import torch
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model_llm = AutoModelForCausalLM.from_pretrained(model_name)
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token

    def _prepare_biweekly_data(self, data: pd.DataFrame, target_column: str) -> pd.DataFrame:
        """Extract bi-weekly target points for training while keeping daily features"""
        if data.index.names[0] != 'PERMNO':
            data_sorted = data.swaplevel().sort_index()
        else:
            data_sorted = data.sort_index()
        
        if not data_sorted.index.is_unique:
            print(f"Warning: Found {data_sorted.index.duplicated().sum()} duplicate index entries. Removing duplicates.")
            data_sorted = data_sorted[~data_sorted.index.duplicated(keep='last')]
        
        biweekly_data = []
        unique_permnos = data_sorted.index.get_level_values('PERMNO').unique()
        
        for permno in unique_permnos:
            try:
                entity_data = data_sorted.loc[permno]
                
                if isinstance(entity_data, pd.Series):
                    entity_data = entity_data.to_frame().T
                    entity_data.index = [data_sorted.loc[permno].name[1]] if hasattr(data_sorted.loc[permno].name, '__getitem__') else [entity_data.index[0]]
                
                # Get dates where target actually changed (bi-weekly points)
                target_changes = entity_data[target_column] != entity_data[target_column].shift(1)
                biweekly_points = entity_data[target_changes | (entity_data.index == entity_data.index[0])]
                
                # Add PERMNO back to index
                biweekly_points_indexed = biweekly_points.copy()
                biweekly_points_indexed['PERMNO'] = permno
                biweekly_points_indexed = biweekly_points_indexed.set_index('PERMNO', append=True).swaplevel()
                
                biweekly_data.append(biweekly_points_indexed)
                
            except (KeyError, Exception) as e:
                print(f"Error processing PERMNO {permno}: {e}")
                continue
        
        return pd.concat(biweekly_data) if biweekly_data else pd.DataFrame()

    def _calculate_metrics(self, y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
        """Calculate comprehensive evaluation metrics"""
        try:
            mae = mean_absolute_error(y_true, y_pred)
            rmse = np.sqrt(mean_squared_error(y_true, y_pred))
            
            # Avoid division by zero in MAPE calculation
            mask = y_true != 0
            if np.sum(mask) > 0:
                mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
            else:
                mape = float('inf')
            
            # R-squared
            ss_res = np.sum((y_true - y_pred) ** 2)
            ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
            r2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
            
            return {
                'MAE': float(mae),
                'RMSE': float(rmse), 
                'MAPE': float(mape),
                'R2': float(r2)
            }
        except Exception as e:
            print(f"Error calculating metrics: {e}")
            return {'MAE': float('inf'), 'RMSE': float('inf'), 'MAPE': float('inf'), 'R2': 0}

    def _prepare_features_and_target(self, data: pd.DataFrame, target_column: str) -> Tuple[np.ndarray, np.ndarray, List[str]]:
        """Prepare features and target for training"""
        feature_cols = [col for col in data.columns if col != target_column]
        numeric_feature_cols = data[feature_cols].select_dtypes(include=[np.number]).columns.tolist()
        
        data_clean = data.dropna(how='all').fillna(0)
        
        X = data_clean[numeric_feature_cols].values
        y = data_clean[target_column].values
        
        return X, y, numeric_feature_cols

    def validate_with_holdout(self, data: pd.DataFrame, target_column: str, 
                            test_size: float = 0.2, validation_size: float = 0.1) -> Dict[str, Any]:
        """
        Comprehensive hold-out validation for bi-weekly forecasting
        
        Args:
            data: Panel DataFrame with engineered features
            target_column: Target variable name
            test_size: Proportion for test set
            validation_size: Proportion for validation set
        """
        print("Starting hold-out validation...")
        
        # Step 1: Time-based split
        train_data, val_data, test_data, split_info = self.validation_strategy.time_based_split(
            data, test_size, validation_size
        )
        
        print(f"Data split - Train: {split_info['train_size']}, Val: {split_info['val_size']}, Test: {split_info['test_size']}")
        
        # Step 2: Prepare bi-weekly data for each split
        train_biweekly = self._prepare_biweekly_data(train_data, target_column)
        val_biweekly = self._prepare_biweekly_data(val_data, target_column) if len(val_data) > 0 else pd.DataFrame()
        test_biweekly = self._prepare_biweekly_data(test_data, target_column)
        
        if len(train_biweekly) < 10:
            return {"error": "Insufficient training data after bi-weekly filtering"}
        
        # Step 3: Model selection using validation set (if available)
        best_model_name = self._select_best_model(train_biweekly, val_biweekly, target_column)
        
        # Step 4: Train final model on train+validation data
        if len(val_biweekly) > 0:
            full_train_data = pd.concat([train_biweekly, val_biweekly])
        else:
            full_train_data = train_biweekly
        
        X_train, y_train, feature_names = self._prepare_features_and_target(full_train_data, target_column)
        X_train_scaled = self.scaler.fit_transform(X_train)
        
        self.selected_model = self.models[best_model_name]
        self.selected_model.fit(X_train_scaled, y_train)
        
        # Step 5: Evaluate on test set
        if len(test_biweekly) > 0:
            X_test, y_test, _ = self._prepare_features_and_target(test_biweekly, target_column)
            X_test_scaled = self.scaler.transform(X_test)
            
            y_pred = self.selected_model.predict(X_test_scaled)
            test_metrics = self._calculate_metrics(y_test, y_pred)
        else:
            test_metrics = {"error": "No test data available"}
            y_test, y_pred = None, None
        
        # Step 6: Generate final predictions for next period
        latest_data = data.groupby(level='PERMNO').tail(1)
        X_latest, _, _ = self._prepare_features_and_target(latest_data, target_column)
        X_latest_scaled = self.scaler.transform(X_latest)
        future_predictions = self.selected_model.predict(X_latest_scaled)
        
        return {
            'validation_results': {
                'test_metrics': test_metrics,
                'model_used': best_model_name,
                'split_info': split_info,
                'training_samples': len(full_train_data),
                'test_samples': len(test_biweekly)
            },
            'predictions': {
                'future_forecasts': future_predictions.tolist(),
                'test_predictions': y_pred.tolist() if y_pred is not None else [],
                'test_actuals': y_test.tolist() if y_test is not None else []
            },
            'model_info': {
                'selected_model': best_model_name,
                'feature_count': len(feature_names),
                'feature_importance': self._get_feature_importance(feature_names) if best_model_name == 'random_forest' else None
            }
        }

    def _select_best_model(self, train_data: pd.DataFrame, val_data: pd.DataFrame, target_column: str) -> str:
        """Select best model based on validation performance"""
        if len(val_data) == 0:
            return 'random_forest'  # Default choice
        
        X_train, y_train, _ = self._prepare_features_and_target(train_data, target_column)
        X_val, y_val, _ = self._prepare_features_and_target(val_data, target_column)
        
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        
        best_model = 'random_forest'
        best_score = float('inf')
        
        for model_name, model in self.models.items():
            try:
                model.fit(X_train_scaled, y_train)
                y_pred = model.predict(X_val_scaled)
                mae = mean_absolute_error(y_val, y_pred)
                
                if mae < best_score:
                    best_score = mae
                    best_model = model_name
                    
            except Exception as e:
                print(f"Error evaluating model {model_name}: {e}")
                continue
        
        return best_model

    def cross_validate(self, data: pd.DataFrame, target_column: str, min_train_size: float = 0.5) -> Dict[str, Any]:
        """
        Expanding window cross-validation for time series
        """
        print("Starting expanding window cross-validation...")
        
        validation_splits = self.validation_strategy.expanding_window_validation(
            data, target_column, min_train_size
        )
        
        cv_results = []
        
        for i, split in enumerate(validation_splits):
            print(f"Processing fold {i+1}/{len(validation_splits)}")
            
            train_biweekly = self._prepare_biweekly_data(split['train_data'], target_column)
            test_biweekly = self._prepare_biweekly_data(split['test_data'], target_column)
            
            if len(train_biweekly) < 5 or len(test_biweekly) == 0:
                continue
            
            # Train model
            X_train, y_train, _ = self._prepare_features_and_target(train_biweekly, target_column)
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            
            model = RandomForestRegressor(n_estimators=50, random_state=42)  # Smaller for speed
            model.fit(X_train_scaled, y_train)
            
            # Test model
            X_test, y_test, _ = self._prepare_features_and_target(test_biweekly, target_column)
            X_test_scaled = scaler.transform(X_test)
            y_pred = model.predict(X_test_scaled)
            
            metrics = self._calculate_metrics(y_test, y_pred)
            
            cv_results.append({
                'fold': i+1,
                'train_end_date': split['train_end_date'],
                'test_date': split['test_date'],
                'metrics': metrics
            })
        
        # Aggregate results
        if cv_results:
            avg_metrics = {}
            for metric in ['MAE', 'RMSE', 'MAPE', 'R2']:
                values = [r['metrics'][metric] for r in cv_results if not np.isinf(r['metrics'][metric])]
                avg_metrics[f'avg_{metric}'] = np.mean(values) if values else float('inf')
                avg_metrics[f'std_{metric}'] = np.std(values) if values else 0
        else:
            avg_metrics = {}
        
        return {
            'cross_validation_results': cv_results,
            'average_metrics': avg_metrics,
            'num_folds': len(cv_results)
        }

    def _get_feature_importance(self, feature_names: List[str]) -> Dict[str, float]:
        """Get feature importance for tree-based models"""
        if hasattr(self.selected_model, 'feature_importances_'):
            importance_dict = dict(zip(feature_names, self.selected_model.feature_importances_))
            return dict(sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)[:10])
        return {}

    # Keep the original generate_forecast method for backward compatibility
    def generate_forecast(self, data: pd.DataFrame, target_column: str, forecast_horizon: int = 1) -> Dict[str, Any]:
        """Original forecasting method (kept for compatibility)"""
        biweekly_data = self._prepare_biweekly_data(data, target_column)
        
        if len(biweekly_data) < 10:
            return {
                "predictions": [0] * forecast_horizon,
                "confidence_intervals": [(0, 0)] * forecast_horizon,
                "model_reasoning": "Insufficient bi-weekly data points for training",
                "forecast_horizon": forecast_horizon,
                "model_used": "none",
                "training_samples": len(biweekly_data)
            }
        
        # Simple model selection and training
        X, y, feature_names = self._prepare_features_and_target(biweekly_data, target_column)
        X_scaled = self.scaler.fit_transform(X)
        
        self.selected_model = self.models['random_forest']
        self.selected_model.fit(X_scaled, y)
        
        latest_data = data.groupby(level='PERMNO').tail(1)
        X_pred, _, _ = self._prepare_features_and_target(latest_data, target_column)
        X_pred_scaled = self.scaler.transform(X_pred)
        
        predictions = self.selected_model.predict(X_pred_scaled).tolist()
        
        train_residuals = self.selected_model.predict(X_scaled) - y
        residual_std = np.std(train_residuals)
        confidence_intervals = [(pred - 1.96 * residual_std, pred + 1.96 * residual_std) 
                              for pred in predictions]
        
        return {
            "predictions": predictions,
            "confidence_intervals": confidence_intervals,
            "model_reasoning": "Standard random forest approach",
            "forecast_horizon": forecast_horizon,
            "model_used": "random_forest",
            "training_samples": len(biweekly_data),
            "feature_importance": self._get_feature_importance(feature_names)
        }



In [110]:
# Update the orchestrator to use the enhanced forecasting agent
class EnhancedOrchestratorAgent:
    def __init__(self, feature_engineer: FeatureEngineeringAgentLLM, forecaster: ForecastingAgentLLM):
        self.feature_engineer = feature_engineer
        self.forecaster = forecaster
    
    def execute_validation_workflow(self, data: pd.DataFrame, target_column: str, 
                                  validation_type: str = 'holdout', **kwargs) -> Dict[str, Any]:
        """Execute workflow with comprehensive validation"""
        print("Starting enhanced workflow with validation...")
        
        # Step 1: Data Analysis
        data_analysis = {
            "shape": data.shape,
            "columns": list(data.columns),
            "target_column": target_column,
            "numeric_columns": list(data.select_dtypes(include=['number']).columns),
            "categorical_columns": list(data.select_dtypes(include=['object', 'category']).columns),
            "date_range": (data.index.get_level_values('date').min(), data.index.get_level_values('date').max()),
            "entities": len(data.index.get_level_values('PERMNO').unique())
        }
        
        # Step 2: Feature Engineering
        engineered_data = self.feature_engineer.process_features(data, target_column, data_analysis)
        
        # Step 3: Validation
        if validation_type == 'holdout':
            validation_results = self.forecaster.validate_with_holdout(
                engineered_data["processed_data"], target_column, **kwargs
            )
        elif validation_type == 'cross_validation':
            validation_results = self.forecaster.cross_validate(
                engineered_data["processed_data"], target_column, **kwargs
            )
        else:
            raise ValueError("validation_type must be 'holdout' or 'cross_validation'")
        
        return {
            "data_analysis": data_analysis,
            "engineered_features": engineered_data,
            "validation": validation_results,
            "workflow_type": validation_type
        }

# Updated main framework class
class EnhancedAgenticForecastingFramework:
    def __init__(self):
        self.feature_engineer = FeatureEngineeringAgentLLM('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
        self.forecaster = ForecastingAgentLLM('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
        self.orchestrator = EnhancedOrchestratorAgent(self.feature_engineer, self.forecaster)
    
    def run_with_validation(self, data: pd.DataFrame, target_column: str, 
                          validation_type: str = 'holdout', **kwargs) -> Dict[str, Any]:
        """Run framework with validation"""
        return self.orchestrator.execute_validation_workflow(data, target_column, validation_type, **kwargs)
    
    def run(self, data: pd.DataFrame, target_column: str, forecast_horizon: int = 1) -> Dict[str, Any]:
        """Original run method (kept for compatibility)"""
        # Use the original orchestrator workflow
        data_analysis = {
            "shape": data.shape,
            "columns": list(data.columns),
            "target_column": target_column,
            "numeric_columns": list(data.select_dtypes(include=['number']).columns),
            "categorical_columns": list(data.select_dtypes(include=['object', 'category']).columns),
        }
        
        engineered_data = self.feature_engineer.process_features(data, target_column, data_analysis)
        forecast = self.forecaster.generate_forecast(engineered_data["processed_data"], target_column, forecast_horizon)
        
        return {
            "data_analysis": data_analysis,
            "engineered_features": engineered_data,
            "forecast": forecast,
            "validation": {"timestamp": pd.Timestamp.now()}
        }

In [112]:
col = ['EXCHCD', 'SICCD', 'TICKER', 'COMNAM', 'SHRCLS', 'TSYMBOL', 'PRIMEXCH',
       'PERMCO', 'CUSIP', 'DLPDT', 'SHRFLG', 'SHRENDDT', 'BIDLO', 'ASKHI',
       'PRC', 'VOL', 'RET', 'SHROUT', 'CFACPR', 'CFACSHR', 'NUMTRD', 'RETX',
       'accountingYearMonthNumber', 'issueName',
       'issuerServicesGroupExchangeCode', 'marketClassCode',
       'currentShortPositionQuantity', 'previousShortPositionQuantity',
       'stockSplitFlag', 'averageDailyVolumeQuantity', 'daysToCoverQuantity',
       'revisionFlag', 'changePercent', 'changePreviousNumber',
       'consensus_rec', 'analyst_count']

# Initialize framework and run
framework = EnhancedAgenticForecastingFramework()
results = framework.run_with_validation(panel_df[col], target_column='currentShortPositionQuantity')

# Print outputs for inspection
print("\nData Analysis:")
print(results["data_analysis"])

print("\nFeature Engineering Strategy:")
print(results["engineered_features"]["strategy"])

print("\nSample of Engineered Features Data:")
print(results["engineered_features"]["processed_data"].head())

print("\nForecast Results:")
print(results["forecast"])

print("\nValidation Summary:")
print(results["validation"])


Starting enhanced workflow with validation...
LLM strategy
```
{
    "lag_features": [1, 2, 4, 8],
    "rolling_features": ["mean_14"],
    "interaction_features": [],
    "time_features": [],
    "reasoning": "To analyze the effect of the 14-day moving average (MA14) on the bi-weekly interest rate forecasting."
}
```
Starting hold-out validation...
Data split - Train: 6636872, Val: 763077, Test: 1918489

Data Analysis:
{'shape': (9318438, 36), 'columns': ['EXCHCD', 'SICCD', 'TICKER', 'COMNAM', 'SHRCLS', 'TSYMBOL', 'PRIMEXCH', 'PERMCO', 'CUSIP', 'DLPDT', 'SHRFLG', 'SHRENDDT', 'BIDLO', 'ASKHI', 'PRC', 'VOL', 'RET', 'SHROUT', 'CFACPR', 'CFACSHR', 'NUMTRD', 'RETX', 'accountingYearMonthNumber', 'issueName', 'issuerServicesGroupExchangeCode', 'marketClassCode', 'currentShortPositionQuantity', 'previousShortPositionQuantity', 'stockSplitFlag', 'averageDailyVolumeQuantity', 'daysToCoverQuantity', 'revisionFlag', 'changePercent', 'changePreviousNumber', 'consensus_rec', 'analyst_count'], 'targ

KeyError: 'forecast'

In [113]:
print("\nValidation Summary:")
print(results["validation"])


Validation Summary:
{'validation_results': {'test_metrics': {'MAE': 2.565995420834382e-08, 'RMSE': 8.313341032727347e-08, 'MAPE': 5.0953456372776e-09, 'R2': 1.0}, 'model_used': 'linear_regression', 'split_info': {'train_dates': (Timestamp('2021-01-04 00:00:00'), Timestamp('2023-11-15 00:00:00')), 'val_dates': (Timestamp('2023-11-16 00:00:00'), Timestamp('2024-03-14 00:00:00')), 'test_dates': (Timestamp('2024-03-15 00:00:00'), Timestamp('2024-12-31 00:00:00')), 'train_size': 6636872, 'val_size': 763077, 'test_size': 1918489}, 'training_samples': 710016, 'test_samples': 189309}, 'predictions': {'future_forecasts': [-4.828907549381256e-07, -2.7939677238464355e-09, 2.468004822731018e-08, 3.259629011154175e-08, 3.5390257835388184e-08, 2.6542693376541138e-08, 1.6298145055770874e-08, -1.6111880540847778e-07, -5.122274160385132e-09, 18.000000020489097, 335.0000000214204, 6263064.999999927, 4.563480615615845e-08, 12430.999999988824, 442627.999999993, 2871.9999999995343, 552399.9999999823, 4064

# Version with agents' communication (untested)

In [None]:
import uuid
from dataclasses import dataclass
from typing import Dict, Any, List, Optional
from enum import Enum

class MessageType(Enum):
    FEATURE_REQUEST = "feature_request"
    FEATURE_RESPONSE = "feature_response" 
    PERFORMANCE_FEEDBACK = "performance_feedback"
    MODEL_REQUEST = "model_request"
    MODEL_RESPONSE = "model_response"
    IMPROVEMENT_SUGGESTION = "improvement_suggestion"

@dataclass
class AgentMessage:
    sender: str
    receiver: str
    message_type: MessageType
    content: Dict[str, Any]
    conversation_id: str
    timestamp: str = None
    
    def __post_init__(self):
        if self.timestamp is None:
            self.timestamp = pd.Timestamp.now().isoformat()

class SimpleMessageBus:
    def __init__(self):
        self.agents = {}                # Registry of all agents
        self.message_history = []       # Keep track of all messages
        self.performance_tracker = {}   # Track how well conversations went
    
    def register_agent(self, name: str, agent):
        """Add an agent to the system"""
        self.agents[name] = agent
        agent.message_bus = self        # Give agent access to send messages
        agent.name = name              # Tell agent its own name
    
    def send_message(self, message: AgentMessage):
        """Deliver a message to the right agent"""
        self.message_history.append(message)    # Keep record
        if message.receiver in self.agents:
            return self.agents[message.receiver].receive_message(message)
        else:
            print(f"Warning: Agent {message.receiver} not found")


class CommunicatingAgent:
    def __init__(self):
        self.message_bus = None         # Will be set when registered
        self.name = None               # Will be set when registered  
        self.conversation_history = []  # All messages this agent received
        self.performance_memory = []    # Remember what worked well
    
    def receive_message(self, message: AgentMessage):
        """Handle incoming messages"""
        self.conversation_history.append(message)  # Keep record
        return self.process_message(message)       # Handle the message
    
    def send_message(self, receiver: str, message_type: MessageType, content: Dict[str, Any], conversation_id: str = None):
        """Send a message to another agent"""
        if conversation_id is None:
            conversation_id = str(uuid.uuid4())  # Create new conversation if needed
        
        message = AgentMessage(
            sender=self.name,
            receiver=receiver,
            message_type=message_type,
            content=content,
            conversation_id=conversation_id
        )
        return self.message_bus.send_message(message)  # Use post office to send
    
    def process_message(self, message: AgentMessage):
        # Override in subclasses - each agent handles messages differently
        pass


In [None]:
class CommunicatingFeatureEngineerLLM(CommunicatingAgent, FeatureEngineeringAgentLLM):
    def __init__(self, model_name: str):
        CommunicatingAgent.__init__(self)
        FeatureEngineeringAgentLLM.__init__(self, model_name)
        self.feature_performance_history = []
    
    def process_message(self, message: AgentMessage):
        if message.message_type == MessageType.PERFORMANCE_FEEDBACK:
            return self.handle_performance_feedback(message)
        elif message.message_type == MessageType.FEATURE_REQUEST:
            return self.handle_feature_request(message)
    
    def handle_performance_feedback(self, message: AgentMessage):
        """Learn from forecasting performance to improve future feature engineering"""
        feedback = message.content
        
        # Store performance history
        self.feature_performance_history.append({
            'timestamp': message.timestamp,
            'metrics': feedback.get('metrics', {}),
            'features_used': feedback.get('features_used', []),
            'feature_importance': feedback.get('feature_importance', {}),
            'conversation_id': message.conversation_id
        })
        
        # Generate improvement suggestions
        improvements = self._generate_improvements_from_feedback(feedback)
        
        # Send back suggestions
        response = self.send_message(
            receiver=message.sender,
            message_type=MessageType.IMPROVEMENT_SUGGESTION,
            content=improvements,
            conversation_id=message.conversation_id
        )
        
        return response
    
    def _generate_improvements_from_feedback(self, feedback: Dict[str, Any]) -> Dict[str, Any]:
        """Generate specific improvements based on performance feedback"""
        metrics = feedback.get('metrics', {})
        feature_importance = feedback.get('feature_importance', {})
        features_used = feedback.get('features_used', [])
        
        # Simple rules for improvement (can be enhanced with LLM later)
        improvements = {
            'analysis': {},
            'suggested_changes': {},
            'reasoning': ""
        }
        
        # Analyze performance
        mae = metrics.get('MAE', float('inf'))
        r2 = metrics.get('R2', 0)
        
        improvements['analysis'] = {
            'performance_level': 'good' if r2 > 0.7 else 'moderate' if r2 > 0.3 else 'poor',
            'mae_level': 'good' if mae < 0.1 else 'moderate' if mae < 0.5 else 'poor'
        }
        
        # Feature-specific suggestions
        if feature_importance:
            top_features = sorted(feature_importance.items(), key=lambda x: x[1], reverse=True)[:5]
            low_features = sorted(feature_importance.items(), key=lambda x: x[1])[:5]
            
            improvements['suggested_changes'] = {
                'keep_feature_types': [f.split('_')[0] for f, _ in top_features],
                'reduce_feature_types': [f.split('_')[0] for f, _ in low_features],
                'increase_lags': r2 < 0.5,  # If poor performance, try more lags
                'add_interactions': mae > 0.3  # If high error, try interactions
            }
        
        improvements['reasoning'] = f"Based on R2={r2:.3f} and MAE={mae:.3f}, suggesting feature adjustments"
        
        return improvements
    
    def process_features_with_communication(self, data: pd.DataFrame, target_column: str, 
                                          data_analysis: Dict, conversation_id: str = None) -> Dict[str, Any]:
        """Enhanced feature processing that considers past performance"""
        
        if conversation_id is None:
            conversation_id = str(uuid.uuid4())
        
        # Get historical context
        recent_performance = self._get_recent_performance_context()
        
        # Build enhanced prompt with performance context
        prompt = self._build_prompt_with_context(data_analysis, target_column, recent_performance)
        
        # Get LLM strategy
        strategy_text = self._call_llm(prompt)
        
        try:
            strategy = json.loads(strategy_text)
        except json.JSONDecodeError:
            strategy = self._get_fallback_strategy(recent_performance)
        
        # Apply strategy
        processed_data = data.copy()
        processed_data = self._create_lag_features(processed_data, strategy.get("lag_features", []), data_analysis['numeric_columns'])
        processed_data = self._create_rolling_features(processed_data, strategy.get("rolling_features", []), data_analysis['numeric_columns'])
        processed_data = self._create_time_features(processed_data, strategy.get("time_features", []))
        processed_data = processed_data.dropna(how='all').fillna(0)
        
        return {
            "strategy": strategy,
            "processed_data": processed_data,
            "feature_names": list(processed_data.columns),
            "engineering_log": f"Applied strategy with performance context: {strategy.get('reasoning')}",
            "conversation_id": conversation_id,
            "performance_context_used": recent_performance is not None
        }
    
    def _get_recent_performance_context(self) -> Optional[Dict[str, Any]]:
        """Get context from recent performance feedback"""
        if not self.feature_performance_history:
            return None
        
        # Get most recent feedback
        recent = sorted(self.feature_performance_history, key=lambda x: x['timestamp'])[-1]
        return recent
    
    def _build_prompt_with_context(self, data_analysis: Dict, target_column: str, 
                                 performance_context: Optional[Dict[str, Any]]) -> str:
        
        context_section = ""
        if performance_context:
            metrics = performance_context['metrics']
            top_features = list(performance_context.get('feature_importance', {}).keys())[:5]
            
            context_section = f"""
Previous Performance Context:
- Last R2: {metrics.get('R2', 'N/A'):.3f}
- Last MAE: {metrics.get('MAE', 'N/A'):.3f}
- Top performing features: {top_features}

Use this context to improve the strategy.
"""
        
        prompt = f"""
You are a feature engineering expert for bi-weekly forecasting on panel data.

Data Analysis:
- Shape: {data_analysis['shape']}
- Target: {target_column} (bi-weekly frequency)
- Entities: {data_analysis.get('entities', 'Unknown')}
- Date range: {data_analysis.get('date_range', 'Unknown')}

{context_section}

Task: Create an improved feature engineering strategy.
Focus on bi-weekly patterns and cross-sectional variations.

Return JSON with:
1. "lag_features": [1, 2, 4, 8, 14] (include bi-weekly relevant lags)
2. "rolling_features": ["mean_7", "mean_14", "std_14"]  
3. "interaction_features": []
4. "time_features": ["month", "quarter", "day_of_week"]
5. "reasoning": Your strategy explanation

Response:
"""
        return prompt
    
    def _get_fallback_strategy(self, performance_context: Optional[Dict[str, Any]]) -> Dict[str, Any]:
        """Fallback strategy based on performance context"""
        if performance_context and performance_context['metrics'].get('R2', 0) < 0.3:
            # Poor performance - try more features
            return {
                "lag_features": [1, 2, 4, 8, 14],
                "rolling_features": ["mean_7", "mean_14", "std_7", "std_14"],
                "interaction_features": [],
                "time_features": ["month", "quarter", "day_of_week", "is_weekend"],
                "reasoning": "Expanding feature set due to poor previous performance"
            }
        else:
            # Default strategy
            return {
                "lag_features": [1, 2, 4],
                "rolling_features": ["mean_7"],
                "interaction_features": [],
                "time_features": ["month"],
                "reasoning": "Standard fallback strategy"
            }


In [None]:
class CommunicatingForecastingAgentLLM(CommunicatingAgent, ForecastingAgentLLM):
    def __init__(self, model_name: str = None):
        CommunicatingAgent.__init__(self)
        ForecastingAgentLLM.__init__(self, model_name)
    
    def validate_with_feedback(self, data: pd.DataFrame, target_column: str, 
                             conversation_id: str, **kwargs) -> Dict[str, Any]:
        """Enhanced validation that provides feedback to feature engineer"""
        
        # Run original validation
        results = self.validate_with_holdout(data, target_column, **kwargs)
        
        # Extract feedback information
        if 'validation_results' in results:
            feedback_content = {
                'metrics': results['validation_results'].get('test_metrics', {}),
                'features_used': list(data.columns),
                'feature_importance': results.get('model_info', {}).get('feature_importance', {}),
                'model_used': results['validation_results'].get('model_used'),
                'training_samples': results['validation_results'].get('training_samples'),
                'conversation_id': conversation_id
            }
            
            # Send feedback to feature engineer
            self.send_message(
                receiver='feature_engineer',
                message_type=MessageType.PERFORMANCE_FEEDBACK,
                content=feedback_content,
                conversation_id=conversation_id
            )
        
        return results


In [None]:
class CommunicatingOrchestrator(CommunicatingAgent):
    def __init__(self, feature_engineer: CommunicatingFeatureEngineerLLM, 
                 forecaster: CommunicatingForecastingAgentLLM):
        super().__init__()
        self.feature_engineer = feature_engineer
        self.forecaster = forecaster
        self.message_bus = SimpleMessageBus()
        
        # Register agents
        self.message_bus.register_agent('orchestrator', self)
        self.message_bus.register_agent('feature_engineer', feature_engineer)
        self.message_bus.register_agent('forecaster', forecaster)
    
    def execute_communicating_workflow(self, data: pd.DataFrame, target_column: str) -> Dict[str, Any]:
        """Execute workflow with agent communication"""
        conversation_id = str(uuid.uuid4())
        
        print(f"Starting communicating workflow (ID: {conversation_id[:8]}...)")
        
        # Step 1: Data Analysis
        data_analysis = {
            "shape": data.shape,
            "columns": list(data.columns),
            "target_column": target_column,
            "numeric_columns": list(data.select_dtypes(include=['number']).columns),
            "categorical_columns": list(data.select_dtypes(include=['object', 'category']).columns),
            "date_range": (data.index.get_level_values('date').min(), data.index.get_level_values('date').max()),
            "entities": len(data.index.get_level_values('PERMNO').unique())
        }
        
        # Step 2: Feature Engineering with Communication
        print("Feature Engineering Agent working...")
        engineered_data = self.feature_engineer.process_features_with_communication(
            data, target_column, data_analysis, conversation_id
        )
        
        # Step 3: Forecasting with Feedback
        print("Forecasting Agent working...")
        forecast_results = self.forecaster.validate_with_feedback(
            engineered_data["processed_data"], target_column, conversation_id
        )
        
        # Step 4: Track overall performance
        if 'validation_results' in forecast_results:
            metrics = forecast_results['validation_results'].get('test_metrics', {})
            self.message_bus.track_performance(conversation_id, metrics)
        
        return {
            "conversation_id": conversation_id,
            "data_analysis": data_analysis,
            "engineered_features": engineered_data,
            "forecast_results": forecast_results,
            "communication_log": [
                {
                    "sender": msg.sender,
                    "receiver": msg.receiver, 
                    "type": msg.message_type.value,
                    "timestamp": msg.timestamp
                }
                for msg in self.message_bus.message_history 
                if msg.conversation_id == conversation_id
            ]
        }
    
    def run_iterative_improvement(self, data: pd.DataFrame, target_column: str, iterations: int = 3):
        """Run multiple iterations to see improvement over time"""
        results = []
        
        for i in range(iterations):
            print(f"\n=== Iteration {i+1}/{iterations} ===")
            result = self.execute_communicating_workflow(data, target_column)
            results.append(result)
            
            # Print progress
            if 'forecast_results' in result and 'validation_results' in result['forecast_results']:
                metrics = result['forecast_results']['validation_results'].get('test_metrics', {})
                print(f"Iteration {i+1} - R2: {metrics.get('R2', 'N/A'):.3f}, MAE: {metrics.get('MAE', 'N/A'):.3f}")
        
        return results


In [None]:
# Initialize the communicating framework
def create_communicating_framework():
    feature_engineer = CommunicatingFeatureEngineerLLM('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
    forecaster = CommunicatingForecastingAgentLLM('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
    orchestrator = CommunicatingOrchestrator(feature_engineer, forecaster)
    return orchestrator

# Run the framework
communicating_framework = create_communicating_framework()

# Single run with communication
results = communicating_framework.execute_communicating_workflow(
    panel_df[col], 'currentShortPositionQuantity'
)

print("Communication Log:")
for log_entry in results['communication_log']:
    print(f"  {log_entry['sender']} → {log_entry['receiver']}: {log_entry['type']}")

# Iterative improvement (this is where you'll see the learning!)
iterative_results = communicating_framework.run_iterative_improvement(
    panel_df[col], 'currentShortPositionQuantity', iterations=3
)
