In [None]:
# =====================================
# 🛠 Enhanced Modular Data Collection
# =====================================

import logging
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os

# Import our enhanced utilities
from app_config import Config
from enhanced_breeze_utils import EnhancedBreezeDataManager, OptionChainAnalyzer
from data_processing_utils import TechnicalIndicatorProcessor, OptionsDataProcessor

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

logger = logging.getLogger(__name__)

# =====================================
# 🔒 Data Validation Utilities
# =====================================

def validate_dataframe_structure(df: pd.DataFrame, required_columns: list, 
                               operation_name: str = "operation") -> pd.DataFrame:
    """
    Validate DataFrame structure and handle missing columns safely.
    
    Args:
        df: Input DataFrame
        required_columns: List of required column names
        operation_name: Name of operation for error messages
    
    Returns:
        Validated DataFrame with required columns
    
    Raises:
        ValueError: If DataFrame is empty or critical columns are missing
    """
    if df is None or df.empty:
        raise ValueError(f"DataFrame is empty for {operation_name}")
    
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        logger.warning(f"Missing columns for {operation_name}: {missing_columns}")
        # Add missing columns with NaN values
        for col in missing_columns:
            df[col] = np.nan
            logger.info(f"Added missing column '{col}' with NaN values")
    
    return df

def ensure_numeric_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
    """
    Ensure specified columns are numeric, converting if necessary.
    
    Args:
        df: Input DataFrame
        columns: List of column names to ensure are numeric
    
    Returns:
        DataFrame with numeric columns
    """
    df_copy = df.copy()
    
    for col in columns:
        if col in df_copy.columns:
            # Convert to numeric, coercing errors to NaN
            df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce')
            
            # Log conversion issues
            nan_count = df_copy[col].isna().sum()
            if nan_count > 0:
                logger.warning(f"Column '{col}': {nan_count} values converted to NaN during numeric conversion")
    
    return df_copy

def validate_datetime_column(df: pd.DataFrame, datetime_col: str = 'datetime') -> pd.DataFrame:
    """
    Validate and ensure datetime column exists and is properly formatted.
    
    Args:
        df: Input DataFrame
        datetime_col: Name of datetime column
    
    Returns:
        DataFrame with validated datetime column
    """
    df_copy = df.copy()
    
    if datetime_col not in df_copy.columns:
        logger.error(f"Datetime column '{datetime_col}' not found in DataFrame")
        # Try common datetime column names
        datetime_candidates = ['timestamp', 'date', 'time', 'dt']
        for candidate in datetime_candidates:
            if candidate in df_copy.columns:
                logger.info(f"Using '{candidate}' as datetime column instead")
                df_copy[datetime_col] = df_copy[candidate]
                break
        else:
            # Create a datetime index if none found
            logger.warning(f"No datetime column found, creating sequential datetime index")
            df_copy[datetime_col] = pd.date_range(start='2024-01-01', periods=len(df_copy), freq='1T')
    
    # Ensure datetime format
    try:
        df_copy[datetime_col] = pd.to_datetime(df_copy[datetime_col], errors='coerce')
        invalid_dates = df_copy[datetime_col].isna().sum()
        if invalid_dates > 0:
            logger.warning(f"Found {invalid_dates} invalid datetime entries, filling with interpolation")
            df_copy[datetime_col] = df_copy[datetime_col].interpolate()
    except Exception as e:
        logger.error(f"Error converting datetime column: {e}")
        raise ValueError(f"Cannot convert '{datetime_col}' to datetime format")
    
    return df_copy

def safe_column_operation(df: pd.DataFrame, operation_func, required_columns: list, 
                         operation_name: str = "operation", default_value=np.nan):
    """
    Safely perform operations that depend on specific columns.
    
    Args:
        df: Input DataFrame
        operation_func: Function to execute if columns exist
        required_columns: List of required columns
        operation_name: Name of operation for logging
        default_value: Default value to return if operation fails
    
    Returns:
        Result of operation_func or default_value
    """
    try:
        # Check if all required columns exist
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            logger.warning(f"Skipping {operation_name}: missing columns {missing_cols}")
            return default_value
        
        # Check if columns have sufficient non-null data
        for col in required_columns:
            if df[col].isna().all():
                logger.warning(f"Skipping {operation_name}: column '{col}' is all NaN")
                return default_value
        
        return operation_func()
        
    except Exception as e:
        logger.error(f"Error in {operation_name}: {e}")
        return default_value

# =====================================
# 🛠 Initialize Enhanced Data Manager
# =====================================

try:
    # Initialize configuration and enhanced data manager
    config = Config()
    data_manager = EnhancedBreezeDataManager()
    
    # Initialize processing utilities
    indicator_processor = TechnicalIndicatorProcessor()
    options_processor = OptionsDataProcessor()
    option_analyzer = OptionChainAnalyzer()
    
    # Set up Google Drive if in Colab environment
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        logger.info("✅ Google Drive mounted")
    except ImportError:
        logger.info("ℹ️ Not in Colab environment, skipping Google Drive mount")
    
    # Authenticate with enhanced retry logic
    auth_result = data_manager.authenticate()
    if auth_result.success:
        logger.info("✅ Breeze API authenticated successfully")
        breeze = data_manager.breeze
    else:
        logger.error(f"❌ Authentication failed: {auth_result.error_message}")
        raise Exception(f"Authentication failed: {auth_result.error_message}")
        
except Exception as e:
    logger.error(f"Critical initialization error: {str(e)}")
    raise

logger.info("✅ All modules and enhanced utilities loaded successfully!")

In [None]:
# =====================================
# 🛠 Parameter Setup with Validation
# =====================================

from enhanced_breeze_utils import MarketDataRequest
from data_processing_utils import ValidationError

def setup_trading_parameters():
    """Setup and validate trading parameters with proper error handling"""
    try:
        # Basic parameters
        stock_name = "TCS"
        interval = "5minute"
        
        # Get trading dates using enhanced utilities
        date_result = data_manager.get_trading_dates(days_back=30)
        if not date_result.success:
            raise ValidationError(f"Failed to get trading dates: {date_result.error_message}")
        
        from_date = date_result.data['from_date']
        to_date = date_result.data['to_date']
        
        logger.info(f"📅 Trading period: {from_date} to {to_date}")
        
        # Get current LTP with enhanced error handling
        ltp_result = data_manager.get_live_price(stock_name, "NSE")
        if not ltp_result.success:
            raise ValidationError(f"Failed to get LTP: {ltp_result.error_message}")
        
        ltp = ltp_result.data['ltp']
        logger.info(f"📦 Current LTP for {stock_name}: {ltp}")
        
        # Get valid expiry using enhanced option analyzer
        expiry_result = option_analyzer.get_next_valid_expiry(stock_name)
        if not expiry_result.success:
            raise ValidationError(f"Failed to get expiry: {expiry_result.error_message}")
        
        expiry_date = expiry_result.data['expiry_date']
        logger.info(f"📌 Using expiry: {expiry_date}")
        
        # Create structured request object
        request = MarketDataRequest(
            stock_code=stock_name,
            exchange_code="NSE",
            interval=interval,
            from_date=from_date,
            to_date=to_date,
            expiry_date=expiry_date,
            current_price=ltp
        )
        
        return request
        
    except Exception as e:
        logger.error(f"Parameter setup failed: {str(e)}")
        raise

# Setup parameters
market_request = setup_trading_parameters()
logger.info("✅ Parameters setup completed successfully")

In [None]:
# =====================================
# 📈 Fetch Equity Data
# =====================================

def fetch_equity_data(request):
    """Fetch equity data with comprehensive error handling and validation"""
    try:
        logger.info(f"📊 Fetching equity data for {request.stock_code}")
        
        # Use enhanced data manager for equity data
        equity_result = data_manager.fetch_historical_data(
            stock_code=request.stock_code,
            exchange_code=request.exchange_code,
            product_type="cash",
            interval=request.interval,
            from_date=request.from_date,
            to_date=request.to_date
        )
        
        if not equity_result.success:
            raise ValidationError(f"Equity data fetch failed: {equity_result.error_message}")
        
        equity_df = equity_result.data
        
        # Validate data structure and types
        logger.info("🔒 Validating equity data structure...")
        required_equity_columns = ['open', 'high', 'low', 'close', 'volume', 'datetime']
        equity_df = validate_dataframe_structure(equity_df, required_equity_columns, "equity data processing")
        
        # Ensure datetime column is properly formatted
        equity_df = validate_datetime_column(equity_df)
        
        # Ensure numeric columns are properly typed
        numeric_columns = ['open', 'high', 'low', 'close', 'volume']
        equity_df = ensure_numeric_columns(equity_df, numeric_columns)
        
        # Validate OHLC logic
        def validate_ohlc_logic():
            invalid_ohlc = equity_df[
                (equity_df['high'] < equity_df['low']) |
                (equity_df['high'] < equity_df['open']) |
                (equity_df['high'] < equity_df['close']) |
                (equity_df['low'] > equity_df['open']) |
                (equity_df['low'] > equity_df['close'])
            ]
            if not invalid_ohlc.empty:
                logger.warning(f"Found {len(invalid_ohlc)} rows with invalid OHLC logic")
                # Fix invalid OHLC by setting high/low to reasonable values
                equity_df.loc[invalid_ohlc.index, 'high'] = equity_df.loc[invalid_ohlc.index, ['open', 'close']].max(axis=1)
                equity_df.loc[invalid_ohlc.index, 'low'] = equity_df.loc[invalid_ohlc.index, ['open', 'close']].min(axis=1)
                logger.info("Fixed invalid OHLC values")
        
        safe_column_operation(equity_df, validate_ohlc_logic, numeric_columns, "OHLC validation")
        
        # Process with technical indicators using enhanced processor
        logger.info("📊 Processing technical indicators...")
        processing_result = indicator_processor.process_dataframe(
            equity_df,
            add_all_indicators=True
        )
        
        if not processing_result.success:
            logger.warning(f"Technical indicator processing had issues: {processing_result.error_message}")
            # Continue with raw data if indicator processing fails
            processed_df = equity_df
        else:
            processed_df = processing_result.data
        
        # Final validation of processed data
        if processed_df.empty:
            raise ValueError("Processed equity data is empty")
        
        logger.info(f"✅ Equity data validated: {len(processed_df)} records, {len(processed_df.columns)} features")
        
        # Save with metadata
        def calculate_indicators_count():
            base_columns = ['datetime', 'open', 'high', 'low', 'close', 'volume']
            return len([c for c in processed_df.columns if c not in base_columns])
        
        indicators_count = safe_column_operation(
            processed_df, calculate_indicators_count, [], "indicators count calculation", 0
        )
        
        save_result = data_manager.save_dataframe(
            processed_df,
            "tcs_equity_data.csv",
            metadata={
                "source": "equity",
                "stock_code": request.stock_code,
                "interval": request.interval,
                "indicators_count": indicators_count,
                "data_validation": "passed"
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Equity data saved: {len(processed_df)} records with {len(processed_df.columns)} features")
        else:
            logger.warning(f"Save failed: {save_result.error_message}")
        
        return processed_df
        
    except Exception as e:
        logger.error(f"Equity data fetch failed: {str(e)}")
        raise

# Fetch equity data with validation
equity_df = fetch_equity_data(market_request)
logger.info(f"📈 Equity data shape: {equity_df.shape}")

In [None]:
# =====================================
# 📊 Fetch Futures Data
# =====================================

def fetch_futures_data(request):
    """Fetch futures data with comprehensive error handling and validation"""
    try:
        logger.info(f"📊 Fetching futures data for {request.stock_code}")
        
        # Use enhanced data manager for futures data
        futures_result = data_manager.fetch_historical_data(
            stock_code=request.stock_code,
            exchange_code="NFO",
            product_type="futures",
            interval=request.interval,
            from_date=request.from_date,
            to_date=request.to_date,
            expiry_date=request.expiry_date
        )
        
        if not futures_result.success:
            logger.warning(f"Futures data fetch failed: {futures_result.error_message}")
            return None  # Return None instead of failing completely
        
        futures_df = futures_result.data
        
        # Validate data structure and types
        logger.info("🔒 Validating futures data structure...")
        required_futures_columns = ['open', 'high', 'low', 'close', 'volume', 'datetime']
        futures_df = validate_dataframe_structure(futures_df, required_futures_columns, "futures data processing")
        
        # Ensure datetime column is properly formatted
        futures_df = validate_datetime_column(futures_df)
        
        # Ensure numeric columns are properly typed
        numeric_columns = ['open', 'high', 'low', 'close', 'volume']
        futures_df = ensure_numeric_columns(futures_df, numeric_columns)
        
        # Check for sufficient data
        if len(futures_df) < 10:
            logger.warning(f"Limited futures data: only {len(futures_df)} records")
        
        # Validate futures-specific data
        def validate_futures_data():
            # Check for reasonable volume values
            if 'volume' in futures_df.columns:
                zero_volume_count = (futures_df['volume'] == 0).sum()
                if zero_volume_count > len(futures_df) * 0.5:
                    logger.warning(f"High zero volume count in futures data: {zero_volume_count}/{len(futures_df)}")
            
            # Check for price continuity
            if 'close' in futures_df.columns:
                price_jumps = futures_df['close'].pct_change().abs()
                extreme_jumps = (price_jumps > 0.1).sum()  # >10% price jumps
                if extreme_jumps > 0:
                    logger.warning(f"Found {extreme_jumps} extreme price jumps in futures data")
        
        safe_column_operation(futures_df, validate_futures_data, numeric_columns, "futures data validation")
        
        # Process with technical indicators
        logger.info("📊 Processing futures technical indicators...")
        processing_result = indicator_processor.process_dataframe(
            futures_df,
            add_all_indicators=True
        )
        
        if not processing_result.success:
            logger.warning(f"Futures technical indicator processing failed: {processing_result.error_message}")
            processed_df = futures_df
        else:
            processed_df = processing_result.data
        
        # Final validation
        if processed_df.empty:
            logger.warning("Processed futures data is empty")
            return None
        
        logger.info(f"✅ Futures data validated: {len(processed_df)} records, {len(processed_df.columns)} features")
        
        # Save with metadata
        save_result = data_manager.save_dataframe(
            processed_df,
            "tcs_futures_data.csv",
            metadata={
                "source": "futures",
                "stock_code": request.stock_code,
                "expiry_date": request.expiry_date,
                "interval": request.interval,
                "data_validation": "passed"
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Futures data saved: {len(processed_df)} records")
        else:
            logger.warning(f"Futures save failed: {save_result.error_message}")
        
        return processed_df
        
    except Exception as e:
        logger.error(f"Futures data processing error: {str(e)}")
        return None  # Graceful degradation

# Fetch futures data with validation
futures_df = fetch_futures_data(market_request)
if futures_df is not None:
    logger.info(f"📊 Futures data shape: {futures_df.shape}")
else:
    logger.warning("⚠️ Futures data not available, continuing without it")

In [None]:
# =====================================
# 🔄 Fetch Options Data
# =====================================

def fetch_options_data(request):
    """Fetch comprehensive options data with enhanced error handling and validation"""
    try:
        logger.info(f"🔄 Fetching options chain for {request.stock_code}")
        
        # Use enhanced option analyzer for comprehensive chain data
        chain_result = option_analyzer.fetch_full_option_chain(
            stock_code=request.stock_code,
            expiry_date=request.expiry_date,
            current_price=request.current_price,
            interval=request.interval,
            from_date=request.from_date,
            to_date=request.to_date,
            strike_range=800  # Configurable range
        )
        
        if not chain_result.success:
            logger.warning(f"Options chain fetch failed: {chain_result.error_message}")
            return None
        
        options_df = chain_result.data
        
        # Validate options data structure
        logger.info("🔒 Validating options data structure...")
        required_options_columns = ['open', 'high', 'low', 'close', 'volume', 'datetime', 'strike', 'right']
        options_df = validate_dataframe_structure(options_df, required_options_columns, "options data processing")
        
        # Ensure datetime column is properly formatted
        options_df = validate_datetime_column(options_df)
        
        # Ensure numeric columns are properly typed
        numeric_columns = ['open', 'high', 'low', 'close', 'volume', 'strike']
        options_df = ensure_numeric_columns(options_df, numeric_columns)
        
        # Validate options-specific data
        def validate_options_specific():
            # Validate option rights
            if 'right' in options_df.columns:
                valid_rights = {'call', 'put', 'c', 'p', 'CE', 'PE'}
                invalid_rights = set(options_df['right'].dropna().astype(str).str.lower().unique()) - {r.lower() for r in valid_rights}
                if invalid_rights:
                    logger.warning(f"Found invalid option rights: {invalid_rights}")
                    # Standardize option rights
                    options_df['right'] = options_df['right'].astype(str).str.upper()
                    options_df['right'] = options_df['right'].replace({'C': 'CE', 'P': 'PE', 'CALL': 'CE', 'PUT': 'PE'})
            
            # Validate strike prices
            if 'strike' in options_df.columns:
                invalid_strikes = options_df['strike'] <= 0
                if invalid_strikes.any():
                    logger.warning(f"Found {invalid_strikes.sum()} invalid strike prices")
                    options_df.loc[invalid_strikes, 'strike'] = np.nan
            
            # Check for reasonable option premiums
            if 'close' in options_df.columns and 'strike' in options_df.columns:
                unreasonable_premiums = options_df['close'] > options_df['strike']
                if unreasonable_premiums.any():
                    logger.warning(f"Found {unreasonable_premiums.sum()} options with premiums > strike prices")
        
        safe_column_operation(options_df, validate_options_specific, 
                            ['right', 'strike', 'close'], "options data validation")
        
        # Check data sufficiency
        if len(options_df) < 5:
            logger.warning(f"Very limited options data: only {len(options_df)} records")
            return None
        
        # Count unique strikes and rights
        def get_options_summary():
            unique_strikes = len(options_df['strike'].dropna().unique()) if 'strike' in options_df.columns else 0
            unique_rights = len(options_df['right'].dropna().unique()) if 'right' in options_df.columns else 0
            return unique_strikes, unique_rights
        
        unique_strikes, unique_rights = safe_column_operation(
            options_df, get_options_summary, ['strike', 'right'], "options summary", (0, 0)
        )
        
        logger.info(f"📊 Options data summary: {unique_strikes} strikes, {unique_rights} rights")
        
        # Process options with specialized processor
        logger.info("📊 Processing options with specialized processor...")
        processing_result = options_processor.process_options_dataframe(
            options_df,
            current_price=request.current_price,
            add_greeks=True,
            add_technical_indicators=True
        )
        
        if not processing_result.success:
            logger.warning(f"Options processing failed: {processing_result.error_message}")
            processed_df = options_df
        else:
            processed_df = processing_result.data
        
        # Final validation
        if processed_df.empty:
            logger.warning("Processed options data is empty")
            return None
        
        # Safe calculation of strike count for metadata
        def safe_strike_count():
            return len(processed_df['strike'].unique()) if 'strike' in processed_df.columns else 0
        
        strike_count = safe_column_operation(
            processed_df, safe_strike_count, ['strike'], "strike count calculation", 0
        )
        
        logger.info(f"✅ Options data validated: {len(processed_df)} records, {len(processed_df.columns)} features")
        
        # Save with comprehensive metadata
        save_result = data_manager.save_dataframe(
            processed_df,
            "tcs_options_data.csv",
            metadata={
                "source": "options",
                "stock_code": request.stock_code,
                "expiry_date": request.expiry_date,
                "current_price": request.current_price,
                "strike_count": strike_count,
                "total_records": len(processed_df),
                "data_validation": "passed"
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Options data saved: {len(processed_df)} records, {strike_count} strikes")
        else:
            logger.warning(f"Options save failed: {save_result.error_message}")
        
        return processed_df
        
    except Exception as e:
        logger.error(f"Options data processing error: {str(e)}")
        return None

# Fetch options data with validation
options_df = fetch_options_data(market_request)
if options_df is not None:
    logger.info(f"🔄 Options data shape: {options_df.shape}")
else:
    logger.warning("⚠️ Options data not available, continuing without it")

In [None]:
# =====================================
# 🔗 Data Combination and Enhancement
# =====================================

from data_processing_utils import ProcessingResult, DataQuality

def combine_and_enhance_data(equity_df, futures_df=None, options_df=None):
    """Combine and enhance all datasets with comprehensive error handling and validation"""
    try:
        logger.info("🔗 Starting data combination and enhancement")
        
        # Validate input datasets
        if equity_df is None or equity_df.empty:
            raise ValueError("Equity data is required and cannot be empty")
        
        # Ensure all datasets have datetime column
        datasets_to_validate = [(equity_df, "equity")]
        if futures_df is not None and not futures_df.empty:
            datasets_to_validate.append((futures_df, "futures"))
        if options_df is not None and not options_df.empty:
            datasets_to_validate.append((options_df, "options"))
        
        validated_datasets = {}
        for df, name in datasets_to_validate:
            logger.info(f"🔒 Validating {name} dataset for combination...")
            
            # Ensure datetime column exists
            df_validated = validate_datetime_column(df)
            
            # Ensure basic numeric columns exist
            basic_columns = ['open', 'high', 'low', 'close', 'volume']
            df_validated = validate_dataframe_structure(df_validated, basic_columns, f"{name} data combination")
            df_validated = ensure_numeric_columns(df_validated, basic_columns)
            
            # Remove any completely empty rows
            df_validated = df_validated.dropna(how='all')
            
            if df_validated.empty:
                logger.warning(f"⚠️ {name} dataset is empty after validation, excluding from combination")
                continue
                
            validated_datasets[name] = df_validated
            logger.info(f"✅ {name} dataset validated: {df_validated.shape}")
        
        if 'equity' not in validated_datasets:
            raise ValueError("Equity data validation failed - cannot proceed with combination")
        
        # Use enhanced options processor for data combination
        logger.info("🔗 Combining validated datasets...")
        combination_result = options_processor.combine_market_data(
            equity_data=validated_datasets['equity'],
            futures_data=validated_datasets.get('futures'),
            options_data=validated_datasets.get('options')
        )
        
        if not combination_result.success:
            logger.warning(f"Data combination had issues: {combination_result.error_message}")
            # Fallback to equity data only
            combined_df = validated_datasets['equity'].copy()
            logger.info("📈 Falling back to equity data only")
        else:
            combined_df = combination_result.data
            logger.info(f"✅ Data combined successfully: {combined_df.shape}")
        
        # Validate combined dataset
        logger.info("🔒 Validating combined dataset...")
        if combined_df.empty:
            raise ValueError("Combined dataset is empty")
        
        # Ensure datetime column is properly set
        combined_df = validate_datetime_column(combined_df)
        
        # Check for and handle infinite values
        def clean_infinite_values():
            numeric_cols = combined_df.select_dtypes(include=[np.number]).columns
            inf_counts = {}
            for col in numeric_cols:
                inf_count = np.isinf(combined_df[col]).sum()
                if inf_count > 0:
                    inf_counts[col] = inf_count
                    combined_df[col] = combined_df[col].replace([np.inf, -np.inf], np.nan)
            
            if inf_counts:
                logger.warning(f"Replaced infinite values in columns: {inf_counts}")
            
            return len(inf_counts)
        
        inf_replacements = safe_column_operation(
            combined_df, clean_infinite_values, [], "infinite value cleanup", 0
        )
        
        # Enhance with relationship metadata using options processor
        logger.info("🔗 Adding relationship features...")
        enhancement_result = options_processor.add_relationship_features(
            combined_df,
            include_correlations=True,
            include_price_targets=True
        )
        
        if not enhancement_result.success:
            logger.warning(f"Enhancement failed: {enhancement_result.error_message}")
            enhanced_df = combined_df
        else:
            enhanced_df = enhancement_result.data
        
        # Final validation of enhanced dataset
        logger.info("🔒 Final validation of enhanced dataset...")
        
        # Check data quality
        def assess_final_quality():
            total_cells = enhanced_df.shape[0] * enhanced_df.shape[1]
            nan_cells = enhanced_df.isna().sum().sum()
            nan_percentage = (nan_cells / total_cells) * 100 if total_cells > 0 else 100
            
            logger.info(f"📊 Data quality: {nan_percentage:.1f}% NaN values")
            
            if nan_percentage < 10:
                return "excellent"
            elif nan_percentage < 25:
                return "good"
            elif nan_percentage < 50:
                return "fair"
            else:
                return "poor"
        
        data_quality = safe_column_operation(
            enhanced_df, assess_final_quality, [], "data quality assessment", "unknown"
        )
        
        # Data quality assessment using options processor
        quality_result = options_processor.assess_data_quality(enhanced_df)
        quality_score = quality_result.metadata.get('quality_score', 'N/A') if quality_result.success else 'N/A'
        
        logger.info(f"📊 Data quality assessment: {quality_score}")
        
        # Safe calculation of feature counts
        def calculate_feature_breakdown():
            equity_features = len([c for c in enhanced_df.columns if c.startswith('equity_')])
            futures_features = len([c for c in enhanced_df.columns if c.startswith('futures_')])
            options_features = len([c for c in enhanced_df.columns if c.startswith('options_')])
            relationship_features = len([c for c in enhanced_df.columns if any(keyword in c for keyword in 
                                      ['corr_', 'basis_', 'divergence', 'ratio'])])
            return equity_features, futures_features, options_features, relationship_features
        
        equity_feat, futures_feat, options_feat, relationship_feat = safe_column_operation(
            enhanced_df, calculate_feature_breakdown, [], "feature breakdown", (0, 0, 0, 0)
        )
        
        # Save final enhanced dataset
        save_result = data_manager.save_dataframe(
            enhanced_df,
            "tcs_enhanced_data.csv",
            metadata={
                "source": "combined_enhanced",
                "features_count": len(enhanced_df.columns),
                "records_count": len(enhanced_df),
                "data_quality": quality_score,
                "data_validation": "comprehensive_passed",
                "equity_features": equity_feat,
                "futures_features": futures_feat,
                "options_features": options_feat,
                "relationship_features": relationship_feat,
                "processing_timestamp": datetime.now().isoformat()
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Enhanced dataset saved: {enhanced_df.shape} with {len(enhanced_df.columns)} features")
        else:
            logger.warning(f"Enhanced data save failed: {save_result.error_message}")
        
        return enhanced_df
        
    except Exception as e:
        logger.error(f"Data combination and enhancement failed: {str(e)}")
        # Provide graceful fallback with basic validation
        if equity_df is not None and not equity_df.empty:
            logger.info("📈 Falling back to validated equity data only")
            fallback_df = validate_dataframe_structure(equity_df, ['open', 'high', 'low', 'close'], "fallback")
            fallback_df = validate_datetime_column(fallback_df)
            return fallback_df
        else:
            raise ValueError("Cannot create fallback dataset - equity data is invalid")

# Combine and enhance all data with comprehensive validation
try:
    enhanced_df = combine_and_enhance_data(equity_df, futures_df, options_df)
    
    # Final data summary with safe operations
    logger.info("="*50)
    logger.info("📊 FINAL DATA SUMMARY")
    logger.info("="*50)
    logger.info(f"📈 Total records: {len(enhanced_df):,}")
    logger.info(f"📊 Total features: {len(enhanced_df.columns):,}")
    
    # Safe date range calculation
    def get_date_range():
        if 'datetime' in enhanced_df.columns and not enhanced_df['datetime'].isna().all():
            return enhanced_df['datetime'].min(), enhanced_df['datetime'].max()
        return "Unknown", "Unknown"
    
    min_date, max_date = safe_column_operation(
        enhanced_df, get_date_range, ['datetime'], "date range calculation", ("Unknown", "Unknown")
    )
    
    logger.info(f"📅 Date range: {min_date} to {max_date}")
    
    # Feature breakdown with safe calculations
    def safe_feature_count(prefix):
        return len([c for c in enhanced_df.columns if c.startswith(prefix)])
    
    equity_features = safe_feature_count('equity_')
    futures_features = safe_feature_count('futures_')
    options_features = safe_feature_count('options_')
    relationship_features = len([c for c in enhanced_df.columns if any(keyword in c for keyword in 
                                ['corr_', 'basis_', 'divergence', 'ratio'])])
    
    logger.info(f"📈 Equity features: {equity_features}")
    logger.info(f"📊 Futures features: {futures_features}")
    logger.info(f"🔄 Options features: {options_features}")
    logger.info(f"🔗 Relationship features: {relationship_features}")
    logger.info("="*50)
    logger.info("✅✅✅ ALL DATA PROCESSING COMPLETED SUCCESSFULLY!")
    logger.info("🔒 Data validation and type safety measures applied")
    logger.info("="*50)
    
except Exception as e:
    logger.error(f"❌ Critical error in data processing: {str(e)}")
    # Provide graceful fallback
    if 'equity_df' in locals() and equity_df is not None and not equity_df.empty:
        logger.info("📈 Falling back to equity data only with validation")
        enhanced_df = validate_dataframe_structure(equity_df, ['open', 'high', 'low', 'close'], "final fallback")
        enhanced_df = validate_datetime_column(enhanced_df)
        logger.info(f"📈 Fallback dataset shape: {enhanced_df.shape}")
    else:
        logger.error("💥 Complete failure - no valid data available")
        raise