In [None]:
# =====================================
# 🛠 Enhanced Modular Data Collection
# =====================================

import logging
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os

# Import our enhanced utilities
from app_config import Config
from enhanced_breeze_utils import EnhancedBreezeDataManager, OptionChainAnalyzer
from data_processing_utils import TechnicalIndicatorProcessor, OptionsDataProcessor

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

logger = logging.getLogger(__name__)

# =====================================
# 🛠 Initialize Enhanced Data Manager
# =====================================

try:
    # Initialize configuration and enhanced data manager
    config = Config()
    data_manager = EnhancedBreezeDataManager()
    
    # Initialize processing utilities
    indicator_processor = TechnicalIndicatorProcessor()
    options_processor = OptionsDataProcessor()
    option_analyzer = OptionChainAnalyzer()
    
    # Set up Google Drive if in Colab environment
    try:
        from google.colab import drive
        drive.mount('/content/drive')
        logger.info("✅ Google Drive mounted")
    except ImportError:
        logger.info("ℹ️ Not in Colab environment, skipping Google Drive mount")
    
    # Authenticate with enhanced retry logic
    auth_result = data_manager.authenticate()
    if auth_result.success:
        logger.info("✅ Breeze API authenticated successfully")
        breeze = data_manager.breeze
    else:
        logger.error(f"❌ Authentication failed: {auth_result.error_message}")
        raise Exception(f"Authentication failed: {auth_result.error_message}")
        
except Exception as e:
    logger.error(f"Critical initialization error: {str(e)}")
    raise

logger.info("✅ All modules and enhanced utilities loaded successfully!")

In [None]:
# =====================================
# 📊 Data Validation Framework
# =====================================

import pandas as pd
import numpy as np
import logging
from typing import List, Optional, Union, Dict, Any
from datetime import datetime

def validate_dataframe_structure(df: pd.DataFrame, required_columns: List[str], 
                               optional_columns: List[str] = None) -> pd.DataFrame:
    """
    Validate DataFrame has required columns and add missing ones with NaN.
    
    Args:
        df: Input DataFrame
        required_columns: List of required column names
        optional_columns: List of optional column names
    
    Returns:
        DataFrame with all required columns (missing ones filled with NaN)
    """
    if df is None or df.empty:
        logging.warning("Empty or None DataFrame provided")
        # Create empty DataFrame with required columns
        return pd.DataFrame(columns=required_columns)
    
    missing_required = [col for col in required_columns if col not in df.columns]
    if missing_required:
        logging.warning(f"Missing required columns: {missing_required}")
        for col in missing_required:
            df[col] = np.nan
    
    # Log available optional columns
    if optional_columns:
        available_optional = [col for col in optional_columns if col in df.columns]
        logging.info(f"Available optional columns: {available_optional}")
    
    return df

def ensure_numeric_columns(df: pd.DataFrame, columns: List[str], 
                         fill_method: str = 'forward') -> pd.DataFrame:
    """
    Ensure specified columns are numeric with proper type conversion and error handling.
    
    Args:
        df: Input DataFrame
        columns: List of column names to convert to numeric
        fill_method: Method to fill NaN values ('forward', 'backward', 'zero', 'mean')
    
    Returns:
        DataFrame with numeric columns
    """
    df_copy = df.copy()
    
    for col in columns:
        if col in df_copy.columns:
            try:
                # Convert to numeric, coercing errors to NaN
                df_copy[col] = pd.to_numeric(df_copy[col], errors='coerce')
                
                # Handle NaN values based on fill_method
                if fill_method == 'forward':
                    df_copy[col] = df_copy[col].fillna(method='ffill')
                elif fill_method == 'backward':
                    df_copy[col] = df_copy[col].fillna(method='bfill')
                elif fill_method == 'zero':
                    df_copy[col] = df_copy[col].fillna(0)
                elif fill_method == 'mean':
                    df_copy[col] = df_copy[col].fillna(df_copy[col].mean())
                
                logging.info(f"Successfully converted {col} to numeric")
            except Exception as e:
                logging.error(f"Failed to convert {col} to numeric: {str(e)}")
    
    return df_copy

def validate_datetime_column(df: pd.DataFrame, datetime_col: str = 'datetime') -> pd.DataFrame:
    """
    Validate and format datetime column with comprehensive error handling.
    
    Args:
        df: Input DataFrame
        datetime_col: Name of datetime column
    
    Returns:
        DataFrame with properly formatted datetime column
    """
    df_copy = df.copy()
    
    if datetime_col not in df_copy.columns:
        # Try common datetime column names
        datetime_alternatives = ['date', 'timestamp', 'time', 'Date', 'DateTime']
        found_col = None
        for alt in datetime_alternatives:
            if alt in df_copy.columns:
                found_col = alt
                break
        
        if found_col:
            logging.info(f"Using {found_col} as datetime column")
            df_copy[datetime_col] = df_copy[found_col]
        else:
            logging.warning(f"No datetime column found, creating default index-based datetime")
            df_copy[datetime_col] = pd.date_range(start='2023-01-01', periods=len(df_copy), freq='D')
            return df_copy
    
    try:
        # Convert to datetime with error handling
        df_copy[datetime_col] = pd.to_datetime(df_copy[datetime_col], errors='coerce')
        
        # Remove rows with invalid datetime
        invalid_datetime_count = df_copy[datetime_col].isna().sum()
        if invalid_datetime_count > 0:
            logging.warning(f"Removing {invalid_datetime_count} rows with invalid datetime")
            df_copy = df_copy.dropna(subset=[datetime_col])
        
        # Sort by datetime
        df_copy = df_copy.sort_values(datetime_col).reset_index(drop=True)
        
        logging.info(f"Successfully validated datetime column: {len(df_copy)} records")
        
    except Exception as e:
        logging.error(f"DateTime validation failed: {str(e)}")
        # Fallback: create sequential datetime
        df_copy[datetime_col] = pd.date_range(start='2023-01-01', periods=len(df_copy), freq='D')
    
    return df_copy

def safe_column_operation(df: pd.DataFrame, operation_func, *args, **kwargs) -> pd.DataFrame:
    """
    Safely perform operations that depend on specific columns with graceful error handling.
    
    Args:
        df: Input DataFrame
        operation_func: Function to perform on DataFrame
        *args, **kwargs: Arguments for the operation function
    
    Returns:
        Result of operation or original DataFrame if operation fails
    """
    try:
        result = operation_func(df, *args, **kwargs)
        logging.info(f"Successfully performed operation: {operation_func.__name__}")
        return result
    except Exception as e:
        logging.warning(f"Operation {operation_func.__name__} failed: {str(e)}, returning original DataFrame")
        return df

logging.info("✅ Data validation utilities loaded successfully!")

In [None]:
# =====================================
# 🛠 Parameter Setup with Validation
# =====================================

from enhanced_breeze_utils import MarketDataRequest
from data_processing_utils import ValidationError

def setup_trading_parameters():
    """Setup and validate trading parameters with proper error handling"""
    try:
        # Basic parameters
        stock_name = "TCS"
        interval = "5minute"
        
        # Get trading dates using enhanced utilities
        date_result = data_manager.get_trading_dates(days_back=30)
        if not date_result.success:
            raise ValidationError(f"Failed to get trading dates: {date_result.error_message}")
        
        from_date = date_result.data['from_date']
        to_date = date_result.data['to_date']
        
        logger.info(f"📅 Trading period: {from_date} to {to_date}")
        
        # Get current LTP with enhanced error handling
        ltp_result = data_manager.get_live_price(stock_name, "NSE")
        if not ltp_result.success:
            raise ValidationError(f"Failed to get LTP: {ltp_result.error_message}")
        
        ltp = ltp_result.data['ltp']
        logger.info(f"📦 Current LTP for {stock_name}: {ltp}")
        
        # Get valid expiry using enhanced option analyzer
        expiry_result = option_analyzer.get_next_valid_expiry(stock_name)
        if not expiry_result.success:
            raise ValidationError(f"Failed to get expiry: {expiry_result.error_message}")
        
        expiry_date = expiry_result.data['expiry_date']
        logger.info(f"📌 Using expiry: {expiry_date}")
        
        # Create structured request object
        request = MarketDataRequest(
            stock_code=stock_name,
            exchange_code="NSE",
            interval=interval,
            from_date=from_date,
            to_date=to_date,
            expiry_date=expiry_date,
            current_price=ltp
        )
        
        return request
        
    except Exception as e:
        logger.error(f"Parameter setup failed: {str(e)}")
        raise

# Setup parameters
market_request = setup_trading_parameters()
logger.info("✅ Parameters setup completed successfully")

In [None]:
# =====================================
# 📈 Fetch Equity Data with Validation
# =====================================

def fetch_equity_data(request):
    """Fetch equity data with comprehensive validation and error handling"""
    try:
        logger.info(f"📊 Fetching equity data for {request.stock_code}")
        
        # Use enhanced data manager for equity data
        equity_result = data_manager.fetch_historical_data(
            stock_code=request.stock_code,
            exchange_code=request.exchange_code,
            product_type="cash",
            interval=request.interval,
            from_date=request.from_date,
            to_date=request.to_date
        )
        
        if not equity_result.success:
            raise ValidationError(f"Equity data fetch failed: {equity_result.error_message}")
        
        equity_df = equity_result.data
        
        # ===== DATA VALIDATION =====
        logger.info("🔍 Starting equity data validation...")
        
        # 1. Validate required OHLCV columns
        required_equity_cols = ['open', 'high', 'low', 'close', 'volume', 'datetime']
        equity_df = validate_dataframe_structure(equity_df, required_equity_cols)
        
        # 2. Validate datetime column
        equity_df = validate_datetime_column(equity_df, 'datetime')
        
        # 3. Ensure numeric columns are properly typed
        numeric_cols = ['open', 'high', 'low', 'close', 'volume']
        equity_df = ensure_numeric_columns(equity_df, numeric_cols, fill_method='forward')
        
        # 4. OHLC logic validation and fixing
        def validate_ohlc_logic(df):
            """Validate and fix OHLC logic inconsistencies"""
            df_copy = df.copy()
            issues_fixed = 0
            
            for idx in df_copy.index:
                try:
                    o, h, l, c = df_copy.loc[idx, ['open', 'high', 'low', 'close']]
                    
                    if pd.isna([o, h, l, c]).any():
                        continue
                    
                    # Check if high is actually the highest
                    actual_high = max(o, h, l, c)
                    if h < actual_high:
                        df_copy.loc[idx, 'high'] = actual_high
                        issues_fixed += 1
                    
                    # Check if low is actually the lowest
                    actual_low = min(o, h, l, c)
                    if l > actual_low:
                        df_copy.loc[idx, 'low'] = actual_low
                        issues_fixed += 1
                        
                except Exception as e:
                    logger.warning(f"OHLC validation failed for index {idx}: {str(e)}")
                    continue
            
            if issues_fixed > 0:
                logger.info(f"Fixed {issues_fixed} OHLC logic inconsistencies")
            
            return df_copy
        
        equity_df = safe_column_operation(equity_df, validate_ohlc_logic)
        
        # 5. Remove infinite values and extreme outliers
        def clean_extreme_values(df):
            """Clean infinite values and extreme outliers"""
            df_copy = df.copy()
            
            # Replace infinite values
            df_copy = df_copy.replace([np.inf, -np.inf], np.nan)
            
            # Remove extreme outliers (beyond 10 standard deviations)
            for col in ['open', 'high', 'low', 'close', 'volume']:
                if col in df_copy.columns:
                    mean_val = df_copy[col].mean()
                    std_val = df_copy[col].std()
                    
                    if not pd.isna(std_val) and std_val > 0:
                        outlier_mask = np.abs(df_copy[col] - mean_val) > (10 * std_val)
                        outlier_count = outlier_mask.sum()
                        
                        if outlier_count > 0:
                            logger.warning(f"Removing {outlier_count} extreme outliers from {col}")
                            df_copy.loc[outlier_mask, col] = np.nan
            
            # Forward fill any new NaN values
            df_copy = df_copy.fillna(method='ffill').fillna(method='bfill')
            
            return df_copy
        
        equity_df = safe_column_operation(equity_df, clean_extreme_values)
        
        # ===== TECHNICAL INDICATORS PROCESSING =====
        # Process with technical indicators using enhanced processor
        processing_result = indicator_processor.process_dataframe(
            equity_df,
            add_all_indicators=True
        )
        
        if not processing_result.success:
            logger.warning(f"Technical indicator processing had issues: {processing_result.error_message}")
            # Continue with validated raw data if indicator processing fails
            processed_df = equity_df
        else:
            processed_df = processing_result.data
            
            # Validate technical indicators
            def validate_technical_indicators(df):
                """Validate technical indicators for reasonableness"""
                df_copy = df.copy()
                
                # Check RSI values (should be between 0 and 100)
                rsi_cols = [col for col in df_copy.columns if 'rsi' in col.lower()]
                for col in rsi_cols:
                    invalid_rsi = (df_copy[col] < 0) | (df_copy[col] > 100)
                    if invalid_rsi.any():
                        logger.warning(f"Found {invalid_rsi.sum()} invalid RSI values in {col}")
                        df_copy.loc[invalid_rsi, col] = np.nan
                
                # Check for extreme technical indicator values
                tech_cols = [col for col in df_copy.columns 
                           if any(tech in col.lower() for tech in ['sma', 'ema', 'macd', 'bb', 'atr'])]
                
                for col in tech_cols:
                    if df_copy[col].std() > 0:
                        z_scores = np.abs((df_copy[col] - df_copy[col].mean()) / df_copy[col].std())
                        extreme_mask = z_scores > 5
                        
                        if extreme_mask.any():
                            logger.warning(f"Found {extreme_mask.sum()} extreme values in {col}")
                            df_copy.loc[extreme_mask, col] = np.nan
                
                # Forward fill any new NaN values
                df_copy = df_copy.fillna(method='ffill').fillna(method='bfill')
                
                return df_copy
            
            processed_df = safe_column_operation(processed_df, validate_technical_indicators)
        
        # ===== METADATA AND QUALITY ASSESSMENT =====
        # Calculate data quality metrics
        total_rows = len(processed_df)
        non_null_rows = processed_df.dropna().shape[0]
        data_quality_score = (non_null_rows / total_rows) * 100 if total_rows > 0 else 0
        
        logger.info(f"📊 Data Quality Assessment:")
        logger.info(f"  Total rows: {total_rows:,}")
        logger.info(f"  Complete rows: {non_null_rows:,}")
        logger.info(f"  Quality score: {data_quality_score:.2f}%")
        
        # Save with comprehensive metadata
        save_result = data_manager.save_dataframe(
            processed_df,
            "tcs_equity_data.csv",
            metadata={
                "source": "equity",
                "stock_code": request.stock_code,
                "interval": request.interval,
                "validation_timestamp": datetime.now().isoformat(),
                "data_quality_score": data_quality_score,
                "total_records": total_rows,
                "complete_records": non_null_rows,
                "indicators_count": len([c for c in processed_df.columns if c not in ['datetime', 'open', 'high', 'low', 'close', 'volume']]),
                "validation_checks_passed": [
                    "column_structure", "datetime_format", "numeric_types", 
                    "ohlc_logic", "outlier_removal", "technical_indicators"
                ]
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Equity data saved: {len(processed_df)} records with {len(processed_df.columns)} features")
        else:
            logger.warning(f"Save failed: {save_result.error_message}")
        
        logger.info(f"✅ Equity data validation completed successfully")
        return processed_df
        
    except Exception as e:
        logger.error(f"Equity data fetch failed: {str(e)}")
        raise

# Fetch equity data with validation
equity_df = fetch_equity_data(market_request)
logger.info(f"📈 Equity data shape: {equity_df.shape}")
logger.info(f"📈 Equity data columns: {list(equity_df.columns[:10])}{'...' if len(equity_df.columns) > 10 else ''}")

In [None]:
# =====================================
# 📈 Fetch Futures Data with Validation
# =====================================

def fetch_futures_data(request):
    """Fetch futures data with comprehensive validation and graceful degradation"""
    try:
        logger.info(f"📊 Fetching futures data for {request.stock_code}")
        
        # Use enhanced data manager for futures data
        futures_result = data_manager.fetch_historical_data(
            stock_code=request.stock_code,
            exchange_code="NFO",
            product_type="futures",
            interval=request.interval,
            from_date=request.from_date,
            to_date=request.to_date,
            expiry_date=request.expiry_date
        )
        
        if not futures_result.success:
            logger.warning(f"Futures data fetch failed: {futures_result.error_message}")
            return None  # Return None instead of failing completely
        
        futures_df = futures_result.data
        
        # ===== DATA VALIDATION =====
        logger.info("🔍 Starting futures data validation...")
        
        # 1. Validate required columns for futures
        required_futures_cols = ['open', 'high', 'low', 'close', 'volume', 'datetime']
        optional_futures_cols = ['open_interest', 'oi', 'basis', 'premium']
        
        futures_df = validate_dataframe_structure(futures_df, required_futures_cols, optional_futures_cols)
        
        # 2. Validate datetime column
        futures_df = validate_datetime_column(futures_df, 'datetime')
        
        # 3. Ensure numeric columns are properly typed
        numeric_cols = ['open', 'high', 'low', 'close', 'volume']
        if 'open_interest' in futures_df.columns:
            numeric_cols.append('open_interest')
        if 'oi' in futures_df.columns:
            numeric_cols.append('oi')
            
        futures_df = ensure_numeric_columns(futures_df, numeric_cols, fill_method='forward')
        
        # 4. Futures-specific validation
        def validate_futures_specifics(df):
            """Validate futures-specific requirements"""
            df_copy = df.copy()
            validation_issues = []
            
            # Volume validation (futures should have meaningful volume)
            if 'volume' in df_copy.columns:
                zero_volume_count = (df_copy['volume'] <= 0).sum()
                if zero_volume_count > len(df_copy) * 0.5:  # More than 50% zero volume
                    validation_issues.append(f"High zero volume: {zero_volume_count}/{len(df_copy)} records")
                
                # Replace zero volumes with small positive values
                df_copy.loc[df_copy['volume'] <= 0, 'volume'] = 1
            
            # Open Interest validation
            oi_col = None
            if 'open_interest' in df_copy.columns:
                oi_col = 'open_interest'
            elif 'oi' in df_copy.columns:
                oi_col = 'oi'
            
            if oi_col:
                # Open interest should generally be non-negative
                negative_oi = (df_copy[oi_col] < 0).sum()
                if negative_oi > 0:
                    validation_issues.append(f"Found {negative_oi} negative open interest values")
                    df_copy.loc[df_copy[oi_col] < 0, oi_col] = 0
            
            # Price continuity check
            def check_price_continuity(prices, threshold=0.1):
                """Check for unrealistic price jumps"""
                if len(prices) < 2:
                    return []
                
                price_changes = prices.pct_change().abs()
                extreme_changes = price_changes > threshold
                return extreme_changes
            
            extreme_close_changes = check_price_continuity(df_copy['close'])
            if extreme_close_changes.any():
                extreme_count = extreme_close_changes.sum()
                validation_issues.append(f"Found {extreme_count} extreme price jumps (>10%)")
                # Log but don't automatically fix - might be legitimate gaps
            
            if validation_issues:
                logger.warning(f"Futures validation issues: {validation_issues}")
            
            return df_copy
        
        futures_df = safe_column_operation(futures_df, validate_futures_specifics)
        
        # 5. Clean extreme values specific to futures
        def clean_futures_extremes(df):
            """Clean futures-specific extreme values"""
            df_copy = df.copy()
            
            # Replace infinite values
            df_copy = df_copy.replace([np.inf, -np.inf], np.nan)
            
            # Volume outlier handling (different from equity)
            if 'volume' in df_copy.columns and len(df_copy) > 10:
                volume_median = df_copy['volume'].median()
                volume_mad = (df_copy['volume'] - volume_median).abs().median()
                
                if volume_mad > 0:
                    # Use median absolute deviation for volume outliers
                    volume_outliers = np.abs(df_copy['volume'] - volume_median) > (10 * volume_mad)
                    outlier_count = volume_outliers.sum()
                    
                    if outlier_count > 0:
                        logger.warning(f"Capping {outlier_count} volume outliers")
                        # Cap instead of removing for futures
                        upper_cap = volume_median + (5 * volume_mad)
                        df_copy.loc[volume_outliers, 'volume'] = upper_cap
            
            # Price outlier handling
            for col in ['open', 'high', 'low', 'close']:
                if col in df_copy.columns and len(df_copy) > 5:
                    # Use rolling median for price outlier detection
                    rolling_median = df_copy[col].rolling(window=5, center=True).median()
                    rolling_mad = (df_copy[col] - rolling_median).abs().rolling(window=5, center=True).median()
                    
                    outlier_mask = np.abs(df_copy[col] - rolling_median) > (8 * rolling_mad)
                    outlier_count = outlier_mask.sum()
                    
                    if outlier_count > 0:
                        logger.warning(f"Found {outlier_count} price outliers in {col}")
                        # For futures, cap rather than remove outliers
                        df_copy.loc[outlier_mask, col] = rolling_median[outlier_mask]
            
            # Fill any remaining NaN values
            df_copy = df_copy.fillna(method='ffill').fillna(method='bfill')
            
            return df_copy
        
        futures_df = safe_column_operation(futures_df, clean_futures_extremes)
        
        # ===== TECHNICAL INDICATORS PROCESSING =====
        # Process with technical indicators
        processing_result = indicator_processor.process_dataframe(
            futures_df,
            add_all_indicators=True
        )
        
        if not processing_result.success:
            logger.warning(f"Futures technical indicator processing failed: {processing_result.error_message}")
            processed_df = futures_df
        else:
            processed_df = processing_result.data
        
        # ===== DATA QUALITY ASSESSMENT =====
        total_rows = len(processed_df)
        non_null_rows = processed_df.dropna().shape[0]
        data_quality_score = (non_null_rows / total_rows) * 100 if total_rows > 0 else 0
        
        # Futures-specific quality checks
        volume_quality = "Good" if processed_df['volume'].mean() > 100 else "Low"
        
        oi_quality = "N/A"
        if 'open_interest' in processed_df.columns:
            oi_mean = processed_df['open_interest'].mean()
            oi_quality = "Good" if oi_mean > 1000 else "Low"
        elif 'oi' in processed_df.columns:
            oi_mean = processed_df['oi'].mean()
            oi_quality = "Good" if oi_mean > 1000 else "Low"
        
        logger.info(f"📊 Futures Data Quality Assessment:")
        logger.info(f"  Total rows: {total_rows:,}")
        logger.info(f"  Complete rows: {non_null_rows:,}")
        logger.info(f"  Quality score: {data_quality_score:.2f}%")
        logger.info(f"  Volume quality: {volume_quality}")
        logger.info(f"  Open Interest quality: {oi_quality}")
        
        # Save with comprehensive metadata
        save_result = data_manager.save_dataframe(
            processed_df,
            "tcs_futures_data.csv",
            metadata={
                "source": "futures",
                "stock_code": request.stock_code,
                "expiry_date": request.expiry_date,
                "interval": request.interval,
                "validation_timestamp": datetime.now().isoformat(),
                "data_quality_score": data_quality_score,
                "volume_quality": volume_quality,
                "oi_quality": oi_quality,
                "total_records": total_rows,
                "complete_records": non_null_rows,
                "validation_checks_passed": [
                    "column_structure", "datetime_format", "numeric_types",
                    "futures_specifics", "volume_validation", "price_continuity"
                ]
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Futures data saved: {len(processed_df)} records")
        else:
            logger.warning(f"Futures save failed: {save_result.error_message}")
        
        logger.info(f"✅ Futures data validation completed successfully")
        return processed_df
        
    except Exception as e:
        logger.error(f"Futures data processing error: {str(e)}")
        return None  # Graceful degradation

# Fetch futures data with validation
futures_df = fetch_futures_data(market_request)
if futures_df is not None:
    logger.info(f"📈 Futures data shape: {futures_df.shape}")
    logger.info(f"📈 Futures data quality validated successfully")
else:
    logger.warning("⚠️ Futures data not available, continuing without it")

In [None]:
# =====================================
# 🔄 Fetch Options Data with Validation
# =====================================

def fetch_options_data(request):
    """Fetch comprehensive options data with enhanced validation and error handling"""
    try:
        logger.info(f"🔄 Fetching options chain for {request.stock_code}")
        
        # Use enhanced option analyzer for comprehensive chain data
        chain_result = option_analyzer.fetch_full_option_chain(
            stock_code=request.stock_code,
            expiry_date=request.expiry_date,
            current_price=request.current_price,
            interval=request.interval,
            from_date=request.from_date,
            to_date=request.to_date,
            strike_range=800  # Configurable range
        )
        
        if not chain_result.success:
            logger.warning(f"Options chain fetch failed: {chain_result.error_message}")
            return None
        
        options_df = chain_result.data
        
        # ===== DATA VALIDATION =====
        logger.info("🔍 Starting options data validation...")
        
        # 1. Validate required columns for options
        required_options_cols = ['strike', 'option_type', 'premium', 'datetime']
        optional_options_cols = [
            'open', 'high', 'low', 'close', 'volume', 'open_interest', 'oi',
            'iv', 'implied_volatility', 'delta', 'gamma', 'theta', 'vega', 'rho'
        ]
        
        options_df = validate_dataframe_structure(options_df, required_options_cols, optional_options_cols)
        
        # 2. Validate datetime column
        options_df = validate_datetime_column(options_df, 'datetime')
        
        # 3. Ensure numeric columns are properly typed
        numeric_cols = ['strike', 'premium']
        price_cols = ['open', 'high', 'low', 'close']
        vol_cols = ['volume', 'open_interest', 'oi']
        greek_cols = ['iv', 'implied_volatility', 'delta', 'gamma', 'theta', 'vega', 'rho']
        
        # Add existing columns to numeric list
        for col_group in [price_cols, vol_cols, greek_cols]:
            numeric_cols.extend([col for col in col_group if col in options_df.columns])
        
        options_df = ensure_numeric_columns(options_df, numeric_cols, fill_method='forward')
        
        # 4. Options-specific validation
        def validate_options_specifics(df):
            """Validate options-specific requirements"""
            df_copy = df.copy()
            validation_issues = []
            
            # Strike price validation
            if 'strike' in df_copy.columns:
                invalid_strikes = (df_copy['strike'] <= 0).sum()
                if invalid_strikes > 0:
                    validation_issues.append(f"Found {invalid_strikes} invalid strike prices")
                    df_copy = df_copy[df_copy['strike'] > 0]
                
                # Check strike price reasonableness relative to current price
                current_price = request.current_price
                if current_price > 0:
                    strike_range_check = (
                        (df_copy['strike'] > current_price * 2.5) | 
                        (df_copy['strike'] < current_price * 0.4)
                    )
                    extreme_strikes = strike_range_check.sum()
                    if extreme_strikes > 0:
                        validation_issues.append(f"Found {extreme_strikes} strikes far from current price")
            
            # Option type validation
            if 'option_type' in df_copy.columns:
                # Standardize option type formats
                df_copy['option_type'] = df_copy['option_type'].astype(str).str.upper()
                
                # Map various formats to standard CE/PE
                option_type_mapping = {
                    'CALL': 'CE', 'C': 'CE', 'CALL_OPTION': 'CE',
                    'PUT': 'PE', 'P': 'PE', 'PUT_OPTION': 'PE'
                }
                
                for old_type, new_type in option_type_mapping.items():
                    df_copy.loc[df_copy['option_type'] == old_type, 'option_type'] = new_type
                
                # Check for invalid option types
                valid_types = ['CE', 'PE']
                invalid_types = ~df_copy['option_type'].isin(valid_types)
                invalid_count = invalid_types.sum()
                
                if invalid_count > 0:
                    validation_issues.append(f"Found {invalid_count} invalid option types")
                    df_copy = df_copy[df_copy['option_type'].isin(valid_types)]
            
            # Premium validation
            if 'premium' in df_copy.columns:
                # Premiums should be non-negative
                negative_premiums = (df_copy['premium'] < 0).sum()
                if negative_premiums > 0:
                    validation_issues.append(f"Found {negative_premiums} negative premiums")
                    df_copy.loc[df_copy['premium'] < 0, 'premium'] = 0
                
                # Check for unreasonably high premiums
                if 'strike' in df_copy.columns:
                    # Premium should generally not exceed strike price for reasonable options
                    high_premium_mask = df_copy['premium'] > df_copy['strike']
                    high_premium_count = high_premium_mask.sum()
                    
                    if high_premium_count > 0:
                        validation_issues.append(f"Found {high_premium_count} suspiciously high premiums")
                        # Log but don't automatically fix - might be legitimate for deep ITM options
            
            # Greeks validation
            if 'delta' in df_copy.columns:
                # Delta should be between -1 and 1
                invalid_delta = (df_copy['delta'] < -1) | (df_copy['delta'] > 1)
                if invalid_delta.any():
                    validation_issues.append(f"Found {invalid_delta.sum()} invalid delta values")
                    df_copy.loc[invalid_delta, 'delta'] = np.nan
            
            if 'gamma' in df_copy.columns:
                # Gamma should be non-negative
                negative_gamma = (df_copy['gamma'] < 0).sum()
                if negative_gamma > 0:
                    validation_issues.append(f"Found {negative_gamma} negative gamma values")
                    df_copy.loc[df_copy['gamma'] < 0, 'gamma'] = np.nan
            
            # Implied Volatility validation
            iv_col = 'iv' if 'iv' in df_copy.columns else 'implied_volatility' if 'implied_volatility' in df_copy.columns else None
            if iv_col:
                # IV should be positive and reasonable (typically < 300%)
                invalid_iv = (df_copy[iv_col] <= 0) | (df_copy[iv_col] > 3.0)
                if invalid_iv.any():
                    validation_issues.append(f"Found {invalid_iv.sum()} invalid IV values")
                    df_copy.loc[invalid_iv, iv_col] = np.nan
            
            if validation_issues:
                logger.warning(f"Options validation issues: {validation_issues}")
            
            return df_copy
        
        options_df = safe_column_operation(options_df, validate_options_specifics)
        
        # 5. Clean extreme values specific to options
        def clean_options_extremes(df):
            """Clean options-specific extreme values"""
            df_copy = df.copy()
            
            # Replace infinite values
            df_copy = df_copy.replace([np.inf, -np.inf], np.nan)
            
            # Volume and OI outlier handling for options
            for vol_col in ['volume', 'open_interest', 'oi']:
                if vol_col in df_copy.columns and len(df_copy) > 10:
                    # Use percentile-based outlier detection for options volume
                    q99 = df_copy[vol_col].quantile(0.99)
                    q1 = df_copy[vol_col].quantile(0.01)
                    
                    outlier_mask = (df_copy[vol_col] > q99 * 10) | (df_copy[vol_col] < 0)
                    outlier_count = outlier_mask.sum()
                    
                    if outlier_count > 0:
                        logger.warning(f"Capping {outlier_count} {vol_col} outliers")
                        df_copy.loc[df_copy[vol_col] > q99 * 10, vol_col] = q99
                        df_copy.loc[df_copy[vol_col] < 0, vol_col] = 0
            
            # Fill remaining NaN values with appropriate methods
            df_copy = df_copy.fillna(method='ffill').fillna(method='bfill')
            
            return df_copy
        
        options_df = safe_column_operation(options_df, clean_options_extremes)
        
        # ===== OPTIONS PROCESSING =====
        # Process options with specialized processor
        processing_result = options_processor.process_options_dataframe(
            options_df,
            current_price=request.current_price,
            add_greeks=True,
            add_technical_indicators=True
        )
        
        if not processing_result.success:
            logger.warning(f"Options processing failed: {processing_result.error_message}")
            processed_df = options_df
        else:
            processed_df = processing_result.data
        
        # ===== DATA QUALITY ASSESSMENT =====
        total_rows = len(processed_df)
        non_null_rows = processed_df.dropna().shape[0]
        data_quality_score = (non_null_rows / total_rows) * 100 if total_rows > 0 else 0
        
        # Options-specific quality metrics
        unique_strikes = len(processed_df['strike'].unique()) if 'strike' in processed_df.columns else 0
        
        option_types = processed_df['option_type'].value_counts().to_dict() if 'option_type' in processed_df.columns else {}
        
        # Calculate average volume and OI
        avg_volume = processed_df['volume'].mean() if 'volume' in processed_df.columns else 0
        avg_oi = processed_df['open_interest'].mean() if 'open_interest' in processed_df.columns else (
            processed_df['oi'].mean() if 'oi' in processed_df.columns else 0
        )
        
        logger.info(f"🔄 Options Data Quality Assessment:")
        logger.info(f"  Total rows: {total_rows:,}")
        logger.info(f"  Complete rows: {non_null_rows:,}")
        logger.info(f"  Quality score: {data_quality_score:.2f}%")
        logger.info(f"  Unique strikes: {unique_strikes}")
        logger.info(f"  Option types: {option_types}")
        logger.info(f"  Average volume: {avg_volume:.2f}")
        logger.info(f"  Average OI: {avg_oi:.2f}")
        
        # Save with comprehensive metadata
        save_result = data_manager.save_dataframe(
            processed_df,
            "tcs_options_data.csv",
            metadata={
                "source": "options",
                "stock_code": request.stock_code,
                "expiry_date": request.expiry_date,
                "current_price": request.current_price,
                "validation_timestamp": datetime.now().isoformat(),
                "data_quality_score": data_quality_score,
                "unique_strikes": unique_strikes,
                "option_types": option_types,
                "average_volume": avg_volume,
                "average_oi": avg_oi,
                "total_records": total_rows,
                "complete_records": non_null_rows,
                "validation_checks_passed": [
                    "column_structure", "datetime_format", "numeric_types",
                    "strike_validation", "option_type_standardization", "premium_validation",
                    "greeks_validation", "iv_validation"
                ]
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Options data saved: {len(processed_df)} records, {unique_strikes} strikes")
        else:
            logger.warning(f"Options save failed: {save_result.error_message}")
        
        logger.info(f"✅ Options data validation completed successfully")
        return processed_df
        
    except Exception as e:
        logger.error(f"Options data processing error: {str(e)}")
        return None

# Fetch options data with validation
options_df = fetch_options_data(market_request)
if options_df is not None:
    logger.info(f"🔄 Options data shape: {options_df.shape}")
    if 'option_type' in options_df.columns:
        logger.info(f"🔄 Option breakdown: {options_df['option_type'].value_counts().to_dict()}")
else:
    logger.warning("⚠️ Options data not available, continuing without it")

In [None]:
# =====================================
# 🔗 Data Combination and Enhancement
# =====================================

from data_processing_utils import ProcessingResult, DataQuality

def combine_and_enhance_data(equity_df, futures_df=None, options_df=None):
    """Combine and enhance all datasets with comprehensive error handling"""
    try:
        logger.info("🔗 Starting data combination and enhancement")
        
        # Use enhanced options processor for data combination
        combination_result = options_processor.combine_market_data(
            equity_data=equity_df,
            futures_data=futures_df,
            options_data=options_df
        )
        
        if not combination_result.success:
            raise ValidationError(f"Data combination failed: {combination_result.error_message}")
        
        combined_df = combination_result.data
        logger.info(f"✅ Data combined successfully: {combined_df.shape}")
        
        # Enhance with relationship metadata using options processor
        enhancement_result = options_processor.add_relationship_features(
            combined_df,
            include_correlations=True,
            include_price_targets=True
        )
        
        if not enhancement_result.success:
            logger.warning(f"Enhancement failed: {enhancement_result.error_message}")
            enhanced_df = combined_df
        else:
            enhanced_df = enhancement_result.data
        
        # Data quality assessment
        quality_result = options_processor.assess_data_quality(enhanced_df)
        logger.info(f"📊 Data quality: {quality_result.metadata.get('quality_score', 'N/A')}")
        
        # Save final enhanced dataset
        save_result = data_manager.save_dataframe(
            enhanced_df,
            "tcs_enhanced_data.csv",
            metadata={
                "source": "combined_enhanced",
                "features_count": len(enhanced_df.columns),
                "records_count": len(enhanced_df),
                "data_quality": quality_result.metadata.get('quality_score'),
                "processing_timestamp": datetime.now().isoformat()
            }
        )
        
        if save_result.success:
            logger.info(f"✅ Enhanced dataset saved: {enhanced_df.shape} with {len(enhanced_df.columns)} features")
        else:
            logger.warning(f"Enhanced data save failed: {save_result.error_message}")
        
        return enhanced_df
        
    except Exception as e:
        logger.error(f"Data combination and enhancement failed: {str(e)}")
        raise

# Combine and enhance all data
try:
    enhanced_df = combine_and_enhance_data(equity_df, futures_df, options_df)
    
    # Final data summary
    logger.info("="*50)
    logger.info("📊 FINAL DATA SUMMARY")
    logger.info("="*50)
    logger.info(f"📈 Total records: {len(enhanced_df):,}")
    logger.info(f"📊 Total features: {len(enhanced_df.columns):,}")
    logger.info(f"📅 Date range: {enhanced_df['datetime'].min()} to {enhanced_df['datetime'].max()}")
    
    # Feature breakdown
    equity_features = len([c for c in enhanced_df.columns if c.startswith('equity_')])
    futures_features = len([c for c in enhanced_df.columns if c.startswith('futures_')])
    options_features = len([c for c in enhanced_df.columns if c.startswith('options_')])
    relationship_features = len([c for c in enhanced_df.columns if any(keyword in c for keyword in ['corr_', 'basis_', 'divergence', 'ratio'])])
    
    logger.info(f"📈 Equity features: {equity_features}")
    logger.info(f"📊 Futures features: {futures_features}")
    logger.info(f"🔄 Options features: {options_features}")
    logger.info(f"🔗 Relationship features: {relationship_features}")
    logger.info("="*50)
    logger.info("✅✅✅ ALL DATA PROCESSING COMPLETED SUCCESSFULLY!")
    logger.info("="*50)
    
except Exception as e:
    logger.error(f"❌ Critical error in data processing: {str(e)}")
    # Provide graceful fallback
    if 'equity_df' in locals():
        logger.info("📈 Falling back to equity data only")
        enhanced_df = equity_df
    else:
        logger.error("💥 Complete failure - no data available")
        raise