<a href="https://colab.research.google.com/github/BaronVonBussin/NewTransit/blob/main/buggy_gelset_20250104.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import logging

class GetSetsProcessor:
    """Main processor for Get Sets analysis"""

    def __init__(self, child_period='D', parent_period='M', jobname='Gelset'):
        self.child_period = child_period
        self.parent_period = parent_period
        self.jobname = jobname
        self.setup_logging()

        # Validate period combinations
        self._validate_periods()

        # Setup paths
        self.setup_directories()

    def _validate_periods(self):
        """Validate period combinations"""
        valid_combinations = {
            ('D', 'W'), ('D', 'M'),
            ('M', 'Q'), ('M', 'Y'),
            ('Q', 'Y'), ('D', 'Q')
        }
        if (self.child_period, self.parent_period) not in valid_combinations:
            raise ValueError(f"Invalid period combination: {self.child_period}/{self.parent_period}")

    def setup_logging(self):
        """Initialize logging"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'getsets_{datetime.now().strftime("%Y%m%d")}.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('GetSets')

    def setup_directories(self):
        """Create required output directories"""
        directories = ['output_child', 'output_parent', 'output_gel_sum']
        for dir_name in directories:
            Path(dir_name).mkdir(exist_ok=True)

    def process_file(self, filepath):
        """Process a single input file"""
        try:
            # Extract file information
            ticker, temporal_period, rolling_range = self._parse_filename(filepath)

            # Load and validate input data
            df = self._load_input_file(filepath)

            # Create parent file
            parent_df = self._create_parent_file(df)

            # Process child calculations
            child_df = self._process_child_calculations(df, parent_df)

            # Update parent with child data
            parent_df = self._update_parent_with_child_data(parent_df, child_df)

            # Generate summary statistics
            self._generate_summary(parent_df, child_df, ticker)

            # Export results
            self._export_results(parent_df, child_df, ticker)

            return True

        except Exception as e:
            self.logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _parse_filename(self, filepath):
        """Parse input filename for ticker and metadata"""
        filename = Path(filepath).stem
        parts = filename.split('_')
        if len(parts) != 3:
            raise ValueError(f"Invalid filename format: {filename}")
        return parts[0], parts[1], parts[2]

    def _load_input_file(self, filepath):
        """Load and validate input file"""
        df = pd.read_csv(filepath)

        # Validate required columns
        required_cols = ['date', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Validate OHLC relationships
        valid_mask = (
            (df['low'] <= df['open']) &
            (df['low'] <= df['close']) &
            (df['high'] >= df['open']) &
            (df['high'] >= df['close']) &
            (df['high'] - df['low'] > 0)
        )

        invalid_rows = df[~valid_mask]
        if len(invalid_rows) > 0:
            self.logger.warning(f"Removing {len(invalid_rows)} invalid rows")
            df = df[valid_mask]

        return df

    def _create_parent_file(self, df):
        """Create parent file from child data"""
        parent_processor = ParentProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return parent_processor.create_parent_file(df)

    def _process_child_calculations(self, df, parent_df):
        """Process all child calculations"""
        child_processor = ChildProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return child_processor.process_calculations(df, parent_df)

    def _update_parent_with_child_data(self, parent_df, child_df):
        """Update parent file with aggregated child data"""
        updater = ParentUpdater(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return updater.update_parent(parent_df, child_df)

    def _generate_summary(self, parent_df, child_df, ticker):
        """Generate summary statistics"""
        summary = SummaryGenerator()
        return summary.generate(parent_df, child_df, ticker)

    def _export_results(self, parent_df, child_df, ticker):
        """Export results to CSV files"""
        # Export child file
        child_path = f'output_child/{ticker}_child_{self.child_period}.csv'
        child_df.to_csv(child_path, index=False)

        # Export parent file
        parent_path = f'output_parent/{ticker}_parent_{self.parent_period}.csv'
        parent_df.to_csv(parent_path, index=False)

        self.logger.info(f"Results exported: {child_path}, {parent_path}")


class ParentProcessor:
    """Handles creation and processing of parent file"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def create_parent_file(self, df):
        """Create parent file by aggregating child data"""
        # Group by parent period
        grouped = self._group_by_parent_period(df)

        # Aggregate OHLC
        parent_df = grouped.agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).reset_index()

        # Add reference fields
        parent_df = self._add_reference_fields(parent_df)

        # Calculate basic indicators
        parent_df = self._calculate_basic_indicators(parent_df)

        return parent_df

    def _group_by_parent_period(self, df):
        """Group data by parent period"""
        if self.parent_period == 'W':
            return df.groupby(pd.Grouper(key='date', freq='W'))
        elif self.parent_period == 'M':
            return df.groupby(pd.Grouper(key='date', freq='M'))
        elif self.parent_period == 'Q':
            return df.groupby(pd.Grouper(key='date', freq='Q'))
        elif self.parent_period == 'Y':
            return df.groupby(pd.Grouper(key='date', freq='Y'))
        else:
            raise ValueError(f"Invalid parent period: {self.parent_period}")

    def _add_reference_fields(self, df):
        """Add reference fields to parent dataframe"""
        df['serial_id'] = self._generate_serial_ids(df)
        df['model_type'] = 1  # bar/prior bar
        df['child_period'] = self.child_period
        df['parent_period'] = self.parent_period
        df['create_date'] = datetime.now().strftime('%Y-%m-%d')
        df['create_time'] = datetime.now().strftime('%H:%M:%S')
        return df

    def _generate_serial_ids(self, df):
        """Generate unique 13-digit serial IDs"""
        base = int(datetime.now().strftime('%Y%m%d%H%M'))
        return [f"{base}{i:03d}" for i in range(len(df))]

    def _calculate_basic_indicators(self, df):
        """Calculate basic technical indicators"""
        # Calculate range
        df['range'] = df['high'] - df['low']

        # Calculate percent_r
        df['percent_r'] = (df['close'] - df['low']) / df['range']

        # Add prior bar references
        df['ro'] = df['open'].shift(1)
        df['rh'] = df['high'].shift(1)
        df['rl'] = df['low'].shift(1)
        df['rc'] = df['close'].shift(1)

        return df


class ChildProcessor:
    """Handles child calculations including bar/prior bar and gel calculations"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def process_calculations(self, df, parent_df):
        """Process all child calculations"""
        # Add trading_bop
        df = self._add_trading_bop(df)

        # Calculate bar/prior bar
        df = self._calculate_bar_prior_bar(df)

        # Calculate gel values
        df = self._calculate_gel_values(df)

        # Calculate prior parent values
        df = self._calculate_prior_parent(df)

        return df

    def _add_trading_bop(self, df):
        """Add trading bar of parent field"""
        # Group by parent period and assign sequential numbers
        if self.parent_period == 'W':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='W')).cumcount() + 1
        elif self.parent_period == 'M':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='M')).cumcount() + 1
        elif self.parent_period == 'Q':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Q')).cumcount() + 1
        elif self.parent_period == 'Y':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Y')).cumcount() + 1

        return df

    def _calculate_bar_prior_bar(self, df):
        """Calculate bar/prior bar values"""
        # REU/RED calculations
        df['reu_value'] = df.apply(
            lambda x: x['high'] - x['high'].shift(1) if x['high'] > x['high'].shift(1) else 0
        )
        df['red_value'] = df.apply(
            lambda x: abs(x['low'] - x['low'].shift(1)) if x['low'] < x['low'].shift(1) else 0
        )

        # Flags
        df['reu_flag'] = (df['reu_value'] > 0).astype(int)
        df['red_flag'] = (df['red_value'] > 0).astype(int)
        df['re_value'] = df['reu_value'] + df['red_value']
        df['re_flag'] = ((df['reu_flag'] == 1) | (df['red_flag'] == 1)).astype(int)

        return df



class GelCalculator:
    """Handles Gel calculations including expansions and pattern recognition"""

    def __init__(self):
        pass

    def calculate_gel_values(self, df):
        """Calculate all gel-related values"""
        # Initialize gel OHLC
        df = self._initialize_gel_ohlc(df)

        # Calculate gel ranges and percentages
        df = self._calculate_gel_ranges(df)

        # Calculate gel expansions
        df = self._calculate_gel_expansions(df)

        # Calculate gel patterns
        df = self._calculate_gel_patterns(df)

        return df

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        # First bar of parent sets initial values
        df['gel_open'] = df.apply(
            lambda x: x['open'] if x['trading_bop'] == 1
            else x['gel_open'].shift(1), axis=1
        )

        # Running high/low within parent
        df['gel_high'] = df.apply(
            lambda x: x['high'] if x['trading_bop'] == 1
            else max(x['high'], x['gel_high'].shift(1)), axis=1
        )

        df['gel_low'] = df.apply(
            lambda x: x['low'] if x['trading_bop'] == 1
            else min(x['low'], x['gel_low'].shift(1)), axis=1
        )

        df['gel_close'] = df['close']

        return df

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        df['gel_range'] = df['gel_high'] - df['gel_low']
        df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

        # Calculate gel ce_percent
        df['gel_ce_percent'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else 1 - x['gel_percent_r'].shift(1) if x['gel_percent_r'].shift(1) >= 0.5
            else x['gel_percent_r'].shift(1), axis=1
        )

        return df

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        # Calculate REU/RED for gel
        df['gel_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['gel_high'] - x['gel_high'].shift(1) if x['gel_high'] > x['gel_high'].shift(1)
            else 0, axis=1
        )

        df['gel_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['gel_low'] - x['gel_low'].shift(1)) if x['gel_low'] < x['gel_low'].shift(1)
            else 0, axis=1
        )

        # Set expansion flags
        df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
        df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
        df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

        return df

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        # Calculate EPC and related fields
        df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
        df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
        df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

        # Calculate E1/E2 values
        df['gel_e1_value'] = df.apply(
            lambda x: x['gel_reu_value'] if x['gel_epc_dir'] == 1
            else x['gel_red_value'], axis=1
        )

        df['gel_e2_value'] = df.apply(
            lambda x: x['gel_red_value'] if x['gel_epc_dir'] == 1
            else x['gel_reu_value'], axis=1
        )

        return df


class PriorParentCalculator:
    """Handles calculations related to prior parent period relationships"""

    def __init__(self):
        pass

    def calculate_prior_parent(self, df):
        """Calculate all prior parent related values"""
        # Initialize parent reference values
        df = self._initialize_parent_refs(df)

        # Calculate ranges and percentages
        df = self._calculate_parent_ranges(df)

        # Calculate expansions against prior parent
        df = self._calculate_parent_expansions(df)

        # Calculate patterns and directions
        df = self._calculate_parent_patterns(df)

        return df

    def _initialize_parent_refs(self, df):
        """Initialize references to parent values"""
        df['gelp_open'] = df['parent_open']

        # Running high/low against parent
        df['gelp_high'] = df.apply(
            lambda x: x['parent_high'] if x['trading_bop'] == 1
            else max(x['parent_high'], x['gel_high'].shift(1)), axis=1
        )

        df['gelp_low'] = df.apply(
            lambda x: x['parent_low'] if x['trading_bop'] == 1
            else min(x['parent_low'], x['gel_low'].shift(1)), axis=1
        )

        df['gelp_close'] = df['close'].shift(1)

        return df

    def _calculate_parent_ranges(self, df):
        """Calculate ranges and percentages against parent"""
        df['gelp_range'] = df['gelp_high'] - df['gelp_low']
        df['gelp_percent_r'] = (df['gelp_close'] - df['gelp_low']) / df['gelp_range']

        df['gelp_ce_percent'] = df.apply(
            lambda x: 1 - x['gelp_percent_r'] if x['gelp_percent_r'] >= 0.5
            else x['gelp_percent_r'], axis=1
        )

        return df

    def _calculate_parent_expansions(self, df):
        """Calculate expansions against parent values"""
        # Calculate REU/RED against parent
        df['gelp_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['high'] - x['gelp_high'] if x['high'] > x['gelp_high']
            else 0, axis=1
        )

        df['gelp_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['low'] - x['gelp_low']) if x['low'] < x['gelp_low']
            else 0, axis=1
        )

        # Set expansion flags
        df['gelp_reu_flag'] = (df['gelp_reu_value'] > 0).astype(int)
        df['gelp_red_flag'] = (df['gelp_red_value'] > 0).astype(int)
        df['gelp_re_value'] = df['gelp_reu_value'] + df['gelp_red_value']

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_dir'].shift(1) if x['gelp_re_flag'] == 0
            else 0 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else 1 if x['gelp_reu_flag'] == 1
            else x['gelp_dir'].shift(1), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_rpc'].shift(1) if x['gelp_dir'] == x['gelp_dir'].shift(1)
            else 2 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == x['gelp_dir'].shift(1)
            else 1, axis=1
        )

        return df


class SummaryGenerator:
    """Generates summary statistics and analysis"""

    def __init__(self):
        pass

    def generate(self, parent_df, child_df, ticker):
        """Generate complete summary statistics"""
        summary = {}

        # Parent level statistics
        summary.update(self._generate_parent_stats(parent_df))

        # Child level statistics
        summary.update(self._generate_child_stats(child_df))

        # Pattern analysis
        summary.update(self._analyze_patterns(parent_df, child_df))

        # Export summary
        self._export_summary(summary, ticker)

        return summary

    def _generate_parent_stats(self, df):
        """Generate parent level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        # E1 FRE flag by EPC HP
        e1_hp_stats = df[df['epc_hp'] == 1]['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_hp_counts'] = e1_hp_stats.to_dict()
        stats['e1_fre_flag_hp_percentages'] = (e1_hp_stats / len(df[df['epc_hp'] == 1]) * 100).to_dict()

        # Range histogram
        stats['range_histogram'] = df['range'].describe().to_dict()

        return stats

    def _generate_child_stats(self, df):
        """Generate child level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['child_e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['child_e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        return stats

    def _analyze_patterns(self, parent_df, child_df):
        """Analyze patterns across parent and child data"""
        patterns = {}

        # Analyze expansion patterns
        patterns['avg_expansion_by_period'] = child_df.groupby('trading_bop')['re_value'].mean().to_dict()

        # Analyze direction persistence
        patterns['direction_persistence'] = self._calculate_direction_persistence(child_df)

        # Analyze high/low positioning
        patterns['hl_position_stats'] = self._analyze_hl_positions(child_df)

        return patterns

    def _calculate_direction_persistence(self, df):
        """Calculate statistics about direction persistence"""
        return {
            'avg_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().mean(),
            'max_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().max()
        }

    def _analyze_hl_positions(self, df):
        """Analyze high/low position patterns"""
        return {
            'early_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) & (df['trading_bop'] <= 2)]),
            'middle_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                               (df['trading_bop'] > 2) & (df['trading_bop'] < df['parent_duration'] - 1)]),
            'late_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                             (df['trading_bop'] >= df['parent_duration'] - 1)])
        }

    def _export_summary(self, summary, ticker):
        """Export summary statistics to CSV"""
        # Convert nested dict to flat format for CSV
        flat_summary = self._flatten_dict(summary)

        # Create DataFrame and export
        summary_df = pd.DataFrame([flat_summary])
        summary_df.to_csv(f'output_gel_sum/{ticker}_summary.csv', index=False)

    def _flatten_dict(self, d, parent_key='', sep='_'):
        """Flatten nested dictionary for CSV export"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import logging

class GetSetsProcessor:
    """Main processor for Get Sets analysis"""

    def __init__(self, child_period='D', parent_period='M', jobname='Gelset'):
        self.child_period = child_period
        self.parent_period = parent_period
        self.jobname = jobname
        self.setup_logging()

        # Validate period combinations
        self._validate_periods()

        # Setup paths
        self.setup_directories()

    def _validate_periods(self):
        """Validate period combinations"""
        valid_combinations = {
            ('D', 'W'), ('D', 'M'),
            ('M', 'Q'), ('M', 'Y'),
            ('Q', 'Y'), ('D', 'Q')
        }
        if (self.child_period, self.parent_period) not in valid_combinations:
            raise ValueError(f"Invalid period combination: {self.child_period}/{self.parent_period}")

    def setup_logging(self):
        """Initialize logging"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'getsets_{datetime.now().strftime("%Y%m%d")}.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('GetSets')

    def setup_directories(self):
        """Create required output directories"""
        directories = ['output_child', 'output_parent', 'output_gel_sum']
        for dir_name in directories:
            Path(dir_name).mkdir(exist_ok=True)

    def process_file(self, filepath):
        """Process a single input file"""
        try:
            # Extract file information
            ticker, temporal_period, rolling_range = self._parse_filename(filepath)

            # Load and validate input data
            df = self._load_input_file(filepath)

            # Create parent file
            parent_df = self._create_parent_file(df)

            # Process child calculations
            child_df = self._process_child_calculations(df, parent_df)

            # Update parent with child data
            parent_df = self._update_parent_with_child_data(parent_df, child_df)

            # Generate summary statistics
            self._generate_summary(parent_df, child_df, ticker)

            # Export results
            self._export_results(parent_df, child_df, ticker)

            return True

        except Exception as e:
            self.logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _parse_filename(self, filepath):
        """Parse input filename for ticker and metadata"""
        filename = Path(filepath).stem
        parts = filename.split('_')
        if len(parts) != 3:
            raise ValueError(f"Invalid filename format: {filename}")
        return parts[0], parts[1], parts[2]

    def _load_input_file(self, filepath):
        """Load and validate input file"""
        df = pd.read_csv(filepath)

        # Validate required columns
        required_cols = ['date', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Validate OHLC relationships
        valid_mask = (
            (df['low'] <= df['open']) &
            (df['low'] <= df['close']) &
            (df['high'] >= df['open']) &
            (df['high'] >= df['close']) &
            (df['high'] - df['low'] > 0)
        )

        invalid_rows = df[~valid_mask]
        if len(invalid_rows) > 0:
            self.logger.warning(f"Removing {len(invalid_rows)} invalid rows")
            df = df[valid_mask]

        return df

    def _create_parent_file(self, df):
        """Create parent file from child data"""
        parent_processor = ParentProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return parent_processor.create_parent_file(df)

    def _process_child_calculations(self, df, parent_df):
        """Process all child calculations"""
        child_processor = ChildProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return child_processor.process_calculations(df, parent_df)

    def _update_parent_with_child_data(self, parent_df, child_df):
        """Update parent file with aggregated child data"""
        updater = ParentUpdater(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return updater.update_parent(parent_df, child_df)

    def _generate_summary(self, parent_df, child_df, ticker):
        """Generate summary statistics"""
        summary = SummaryGenerator()
        return summary.generate(parent_df, child_df, ticker)

    def _export_results(self, parent_df, child_df, ticker):
        """Export results to CSV files"""
        # Export child file
        child_path = f'output_child/{ticker}_child_{self.child_period}.csv'
        child_df.to_csv(child_path, index=False)

        # Export parent file
        parent_path = f'output_parent/{ticker}_parent_{self.parent_period}.csv'
        parent_df.to_csv(parent_path, index=False)

        self.logger.info(f"Results exported: {child_path}, {parent_path}")


class ParentProcessor:
    """Handles creation and processing of parent file"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def create_parent_file(self, df):
        """Create parent file by aggregating child data"""
        # Group by parent period
        grouped = self._group_by_parent_period(df)

        # Aggregate OHLC
        parent_df = grouped.agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).reset_index()

        # Add reference fields
        parent_df = self._add_reference_fields(parent_df)

        # Calculate basic indicators
        parent_df = self._calculate_basic_indicators(parent_df)

        return parent_df

    def _group_by_parent_period(self, df):
        """Group data by parent period"""
        if self.parent_period == 'W':
            return df.groupby(pd.Grouper(key='date', freq='W'))
        elif self.parent_period == 'M':
            return df.groupby(pd.Grouper(key='date', freq='M'))
        elif self.parent_period == 'Q':
            return df.groupby(pd.Grouper(key='date', freq='Q'))
        elif self.parent_period == 'Y':
            return df.groupby(pd.Grouper(key='date', freq='Y'))
        else:
            raise ValueError(f"Invalid parent period: {self.parent_period}")

    def _add_reference_fields(self, df):
        """Add reference fields to parent dataframe"""
        df['serial_id'] = self._generate_serial_ids(df)
        df['model_type'] = 1  # bar/prior bar
        df['child_period'] = self.child_period
        df['parent_period'] = self.parent_period
        df['create_date'] = datetime.now().strftime('%Y-%m-%d')
        df['create_time'] = datetime.now().strftime('%H:%M:%S')
        return df

    def _generate_serial_ids(self, df):
        """Generate unique 13-digit serial IDs"""
        base = int(datetime.now().strftime('%Y%m%d%H%M'))
        return [f"{base}{i:03d}" for i in range(len(df))]

    def _calculate_basic_indicators(self, df):
        """Calculate basic technical indicators"""
        # Calculate range
        df['range'] = df['high'] - df['low']

        # Calculate percent_r
        df['percent_r'] = (df['close'] - df['low']) / df['range']

        # Add prior bar references
        df['ro'] = df['open'].shift(1)
        df['rh'] = df['high'].shift(1)
        df['rl'] = df['low'].shift(1)
        df['rc'] = df['close'].shift(1)

        return df


class ChildProcessor:
    """Handles child calculations including bar/prior bar and gel calculations"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def process_calculations(self, df, parent_df):
        """Process all child calculations"""
        # Add trading_bop
        df = self._add_trading_bop(df)

        # Calculate bar/prior bar
        df = self._calculate_bar_prior_bar(df)

        # Calculate gel values
        df = self._calculate_gel_values(df)

        # Calculate prior parent values
        df = self._calculate_prior_parent(df)

        return df

    def _add_trading_bop(self, df):
        """Add trading bar of parent field"""
        # Group by parent period and assign sequential numbers
        if self.parent_period == 'W':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='W')).cumcount() + 1
        elif self.parent_period == 'M':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='M')).cumcount() + 1
        elif self.parent_period == 'Q':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Q')).cumcount() + 1
        elif self.parent_period == 'Y':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Y')).cumcount() + 1

        return df

    def _calculate_bar_prior_bar(self, df):
        """Calculate bar/prior bar values"""
        # REU/RED calculations
        df['reu_value'] = df.apply(
            lambda x: x['high'] - x['high'].shift(1) if x['high'] > x['high'].shift(1) else 0
        )
        df['red_value'] = df.apply(
            lambda x: abs(x['low'] - x['low'].shift(1)) if x['low'] < x['low'].shift(1) else 0
        )

        # Flags
        df['reu_flag'] = (df['reu_value'] > 0).astype(int)
        df['red_flag'] = (df['red_value'] > 0).astype(int)
        df['re_value'] = df['reu_value'] + df['red_value']
        df['re_flag'] = ((df['reu_flag'] == 1) | (df['red_flag'] == 1)).astype(int)

        return df



class GelCalculator:
    """Handles Gel calculations including expansions and pattern recognition"""

    def __init__(self):
        pass

    def calculate_gel_values(self, df):
        """Calculate all gel-related values"""
        # Initialize gel OHLC
        df = self._initialize_gel_ohlc(df)

        # Calculate gel ranges and percentages
        df = self._calculate_gel_ranges(df)

        # Calculate gel expansions
        df = self._calculate_gel_expansions(df)

        # Calculate gel patterns
        df = self._calculate_gel_patterns(df)

        return df

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        # First bar of parent sets initial values
        df['gel_open'] = df.apply(
            lambda x: x['open'] if x['trading_bop'] == 1
            else x['gel_open'].shift(1), axis=1
        )

        # Running high/low within parent
        df['gel_high'] = df.apply(
            lambda x: x['high'] if x['trading_bop'] == 1
            else max(x['high'], x['gel_high'].shift(1)), axis=1
        )

        df['gel_low'] = df.apply(
            lambda x: x['low'] if x['trading_bop'] == 1
            else min(x['low'], x['gel_low'].shift(1)), axis=1
        )

        df['gel_close'] = df['close']

        return df

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        df['gel_range'] = df['gel_high'] - df['gel_low']
        df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

        # Calculate gel ce_percent
        df['gel_ce_percent'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else 1 - x['gel_percent_r'].shift(1) if x['gel_percent_r'].shift(1) >= 0.5
            else x['gel_percent_r'].shift(1), axis=1
        )

        return df

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        # Calculate REU/RED for gel
        df['gel_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['gel_high'] - x['gel_high'].shift(1) if x['gel_high'] > x['gel_high'].shift(1)
            else 0, axis=1
        )

        df['gel_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['gel_low'] - x['gel_low'].shift(1)) if x['gel_low'] < x['gel_low'].shift(1)
            else 0, axis=1
        )

        # Set expansion flags
        df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
        df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
        df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

        return df

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        # Calculate EPC and related fields
        df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
        df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
        df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

        # Calculate E1/E2 values
        df['gel_e1_value'] = df.apply(
            lambda x: x['gel_reu_value'] if x['gel_epc_dir'] == 1
            else x['gel_red_value'], axis=1
        )

        df['gel_e2_value'] = df.apply(
            lambda x: x['gel_red_value'] if x['gel_epc_dir'] == 1
            else x['gel_reu_value'], axis=1
        )

        return df


class PriorParentCalculator:
    """Handles calculations related to prior parent period relationships"""

    def __init__(self):
        pass

    def calculate_prior_parent(self, df):
        """Calculate all prior parent related values"""
        # Initialize parent reference values
        df = self._initialize_parent_refs(df)

        # Calculate ranges and percentages
        df = self._calculate_parent_ranges(df)

        # Calculate expansions against prior parent
        df = self._calculate_parent_expansions(df)

        # Calculate patterns and directions
        df = self._calculate_parent_patterns(df)

        return df

    def _initialize_parent_refs(self, df):
        """Initialize references to parent values"""
        df['gelp_open'] = df['parent_open']

        # Running high/low against parent
        df['gelp_high'] = df.apply(
            lambda x: x['parent_high'] if x['trading_bop'] == 1
            else max(x['parent_high'], x['gel_high'].shift(1)), axis=1
        )

        df['gelp_low'] = df.apply(
            lambda x: x['parent_low'] if x['trading_bop'] == 1
            else min(x['parent_low'], x['gel_low'].shift(1)), axis=1
        )

        df['gelp_close'] = df['close'].shift(1)

        return df

    def _calculate_parent_ranges(self, df):
        """Calculate ranges and percentages against parent"""
        df['gelp_range'] = df['gelp_high'] - df['gelp_low']
        df['gelp_percent_r'] = (df['gelp_close'] - df['gelp_low']) / df['gelp_range']

        df['gelp_ce_percent'] = df.apply(
            lambda x: 1 - x['gelp_percent_r'] if x['gelp_percent_r'] >= 0.5
            else x['gelp_percent_r'], axis=1
        )

        return df

    def _calculate_parent_expansions(self, df):
        """Calculate expansions against parent values"""
        # Calculate REU/RED against parent
        df['gelp_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['high'] - x['gelp_high'] if x['high'] > x['gelp_high']
            else 0, axis=1
        )

        df['gelp_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['low'] - x['gelp_low']) if x['low'] < x['gelp_low']
            else 0, axis=1
        )

        # Set expansion flags
        df['gelp_reu_flag'] = (df['gelp_reu_value'] > 0).astype(int)
        df['gelp_red_flag'] = (df['gelp_red_value'] > 0).astype(int)
        df['gelp_re_value'] = df['gelp_reu_value'] + df['gelp_red_value']

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_dir'].shift(1) if x['gelp_re_flag'] == 0
            else 0 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else 1 if x['gelp_reu_flag'] == 1
            else x['gelp_dir'].shift(1), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_rpc'].shift(1) if x['gelp_dir'] == x['gelp_dir'].shift(1)
            else 2 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == x['gelp_dir'].shift(1)
            else 1, axis=1
        )

        return df


class SummaryGenerator:
    """Generates summary statistics and analysis"""

    def __init__(self):
        pass

    def generate(self, parent_df, child_df, ticker):
        """Generate complete summary statistics"""
        summary = {}

        # Parent level statistics
        summary.update(self._generate_parent_stats(parent_df))

        # Child level statistics
        summary.update(self._generate_child_stats(child_df))

        # Pattern analysis
        summary.update(self._analyze_patterns(parent_df, child_df))

        # Export summary
        self._export_summary(summary, ticker)

        return summary

    def _generate_parent_stats(self, df):
        """Generate parent level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        # E1 FRE flag by EPC HP
        e1_hp_stats = df[df['epc_hp'] == 1]['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_hp_counts'] = e1_hp_stats.to_dict()
        stats['e1_fre_flag_hp_percentages'] = (e1_hp_stats / len(df[df['epc_hp'] == 1]) * 100).to_dict()

        # Range histogram
        stats['range_histogram'] = df['range'].describe().to_dict()

        return stats

    def _generate_child_stats(self, df):
        """Generate child level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['child_e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['child_e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        return stats

    def _analyze_patterns(self, parent_df, child_df):
        """Analyze patterns across parent and child data"""
        patterns = {}

        # Analyze expansion patterns
        patterns['avg_expansion_by_period'] = child_df.groupby('trading_bop')['re_value'].mean().to_dict()

        # Analyze direction persistence
        patterns['direction_persistence'] = self._calculate_direction_persistence(child_df)

        # Analyze high/low positioning
        patterns['hl_position_stats'] = self._analyze_hl_positions(child_df)

        return patterns

    def _calculate_direction_persistence(self, df):
        """Calculate statistics about direction persistence"""
        return {
            'avg_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().mean(),
            'max_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().max()
        }

    def _analyze_hl_positions(self, df):
        """Analyze high/low position patterns"""
        return {
            'early_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) & (df['trading_bop'] <= 2)]),
            'middle_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                               (df['trading_bop'] > 2) & (df['trading_bop'] < df['parent_duration'] - 1)]),
            'late_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                             (df['trading_bop'] >= df['parent_duration'] - 1)])
        }

    def _export_summary(self, summary, ticker):
        """Export summary statistics to CSV"""
        # Convert nested dict to flat format for CSV
        flat_summary = self._flatten_dict(summary)

        # Create DataFrame and export
        summary_df = pd.DataFrame([flat_summary])
        summary_df.to_csv(f'output_gel_sum/{ticker}_summary.csv', index=False)

    def _flatten_dict(self, d, parent_key='', sep='_'):
        """Flatten nested dictionary for CSV export"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

class ParentUpdater:
    """Updates parent file with aggregated child data"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def update_parent(self, parent_df, child_df):
        """Update parent with aggregated child data"""
        # Calculate intrabar counts
        intrabar_counts = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).agg({
            'reu_flag': 'sum',
            'red_flag': 'sum',
            'rpc': 'sum'
        }).rename(columns={
            'reu_flag': 'intrabar_reu_count',
            'red_flag': 'intrabar_red_count',
            'rpc': 'intrabar_rpc'
        })

        # Calculate priorbar counts
        priorbar_counts = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).agg({
            'gelp_reu_flag': 'sum',
            'gelp_red_flag': 'sum',
            'gelp_red_flag': 'sum'  # Using red_flag for rpc as per spec
        }).rename(columns={
            'gelp_reu_flag': 'priorbar_reu_count',
            'gelp_red_flag': 'priorbar_red_count',
            'gelp_red_flag': 'priorbar_rpc'
        })

        # Find first/last RE positions
        def get_positions(group):
            re_bars = group[group['gelp_re_flag'] == 1]['trading_bop']
            return pd.Series({
                'priorbar_first_re': re_bars.iloc[0] if len(re_bars) > 0 else None,
                'priorbar_last_re': re_bars.iloc[-1] if len(re_bars) > 0 else None
            })

        positions = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).apply(get_positions)

        # Merge all updates into parent
        parent_df = parent_df.join(intrabar_counts, how='left')
        parent_df = parent_df.join(priorbar_counts, how='left')
        parent_df = parent_df.join(positions, how='left')

        return parent_df

def main():
    # Initialize processor
    processor = GetSetsProcessor(child_period='D', parent_period='M')

    # Process all files in input directory
    input_dir = Path('/content/input')

    if not input_dir.exists():
        raise FileNotFoundError(f"Input directory not found: {input_dir}")

    # Process each CSV file
    for file_path in input_dir.glob('*.csv'):
        try:
            print(f"\nProcessing file: {file_path}")
            processor.process_file(file_path)
            print(f"Successfully processed: {file_path}")

        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")
            logging.error(f"Error processing {file_path}: {str(e)}", exc_info=True)

# Run the processor
main()


  return df.groupby(pd.Grouper(key='date', freq='M'))
ERROR:GetSets:Error processing file /content/input/AAPL_D_1.csv: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'
ERROR:root:Error processing /content/input/AAPL_D_1.csv: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'
Traceback (most recent call last):
  File "<ipython-input-4-9ca5f3d6ce1b>", line 658, in main
    processor.process_file(file_path)
  File "<ipython-input-4-9ca5f3d6ce1b>", line 60, in process_file
    parent_df = self._create_parent_file(df)
  File "<ipython-input-4-9ca5f3d6ce1b>", line 120, in _create_parent_file
    return parent_processor.create_parent_file(df)
  File "<ipython-input-4-9ca5f3d6ce1b>", line 166, in create_parent_file
    grouped = self._group_by_parent_period(df)
  File "<ipython-input-4-9ca5f3d6ce1b>", line 190, in _group_by_parent_period
    return df.groupby(pd.Grouper(key='date', freq='M'))
  File "/usr/loc


Processing file: /content/input/AAPL_D_1.csv
Error processing /content/input/AAPL_D_1.csv: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'

Processing file: /content/input/MMM_D_1.csv
Error processing /content/input/MMM_D_1.csv: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'

Processing file: /content/input/XLB_D_1.csv
Error processing /content/input/XLB_D_1.csv: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'

Processing file: /content/input/AFL_D_1.csv
Error processing /content/input/AFL_D_1.csv: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import logging

class GetSetsProcessor:
    """Main processor for Get Sets analysis"""

    def __init__(self, child_period='D', parent_period='M', jobname='Gelset'):
        self.child_period = child_period
        self.parent_period = parent_period
        self.jobname = jobname
        self.setup_logging()

        # Validate period combinations
        self._validate_periods()

        # Setup paths
        self.setup_directories()

    def _validate_periods(self):
        """Validate period combinations"""
        valid_combinations = {
            ('D', 'W'), ('D', 'M'),
            ('M', 'Q'), ('M', 'Y'),
            ('Q', 'Y'), ('D', 'Q')
        }
        if (self.child_period, self.parent_period) not in valid_combinations:
            raise ValueError(f"Invalid period combination: {self.child_period}/{self.parent_period}")

    def setup_logging(self):
        """Initialize logging"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'getsets_{datetime.now().strftime("%Y%m%d")}.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('GetSets')

    def setup_directories(self):
        """Create required output directories"""
        directories = ['output_child', 'output_parent', 'output_gel_sum']
        for dir_name in directories:
            Path(dir_name).mkdir(exist_ok=True)

    def process_file(self, filepath):
        """Process a single input file"""
        try:
            # Extract file information
            ticker, temporal_period, rolling_range = self._parse_filename(filepath)

            # Load and validate input data
            df = self._load_input_file(filepath)

            # Create parent file
            parent_df = self._create_parent_file(df)

            # Process child calculations
            child_df = self._process_child_calculations(df, parent_df)

            # Update parent with child data
            parent_df = self._update_parent_with_child_data(parent_df, child_df)

            # Generate summary statistics
            self._generate_summary(parent_df, child_df, ticker)

            # Export results
            self._export_results(parent_df, child_df, ticker)

            return True

        except Exception as e:
            self.logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _parse_filename(self, filepath):
        """Parse input filename for ticker and metadata"""
        filename = Path(filepath).stem
        parts = filename.split('_')
        if len(parts) != 3:
            raise ValueError(f"Invalid filename format: {filename}")
        return parts[0], parts[1], parts[2]

    def _load_input_file(self, filepath):
        """Load and validate input file"""
        # Read CSV with date parsing
        df = pd.read_csv(filepath)

        # Validate required columns
        required_cols = ['date', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Convert date to datetime and set as index
        try:
            df['date'] = pd.to_datetime(df['date'])
            df.sort_values('date', inplace=True)
            df.set_index('date', inplace=True)
        except Exception as e:
            raise ValueError(f"Error processing date column: {str(e)}")

        # Validate OHLC relationships
        valid_mask = (
            (df['low'] <= df['open']) &
            (df['low'] <= df['close']) &
            (df['high'] >= df['open']) &
            (df['high'] >= df['close']) &
            (df['high'] - df['low'] > 0)
        )

        invalid_rows = df[~valid_mask]
        if len(invalid_rows) > 0:
            self.logger.warning(f"Removing {len(invalid_rows)} invalid rows")
            df = df[valid_mask]

        return df

    def _create_parent_file(self, df):
        """Create parent file from child data"""
        parent_processor = ParentProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return parent_processor.create_parent_file(df)

    def _process_child_calculations(self, df, parent_df):
        """Process all child calculations"""
        child_processor = ChildProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return child_processor.process_calculations(df, parent_df)

    def _update_parent_with_child_data(self, parent_df, child_df):
        """Update parent file with aggregated child data"""
        updater = ParentUpdater(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return updater.update_parent(parent_df, child_df)

    def _generate_summary(self, parent_df, child_df, ticker):
        """Generate summary statistics"""
        summary = SummaryGenerator()
        return summary.generate(parent_df, child_df, ticker)

    def _export_results(self, parent_df, child_df, ticker):
        """Export results to CSV files"""
        # Export child file
        child_path = f'output_child/{ticker}_child_{self.child_period}.csv'
        child_df.to_csv(child_path, index=False)

        # Export parent file
        parent_path = f'output_parent/{ticker}_parent_{self.parent_period}.csv'
        parent_df.to_csv(parent_path, index=False)

        self.logger.info(f"Results exported: {child_path}, {parent_path}")


class ParentProcessor:
    """Handles creation and processing of parent file"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def create_parent_file(self, df):
        """Create parent file by aggregating child data"""
        # Group by parent period
        grouped = self._group_by_parent_period(df)

        # Aggregate OHLC
        parent_df = grouped.agg({
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'
        }).reset_index()

        # Add reference fields
        parent_df = self._add_reference_fields(parent_df)

        # Calculate basic indicators
        parent_df = self._calculate_basic_indicators(parent_df)

        return parent_df

    def _group_by_parent_period(self, df):
        """Group data by parent period"""
        if self.parent_period == 'W':
            return df.groupby(pd.Grouper(freq='W-FRI'))  # End on Friday
        elif self.parent_period == 'M':
            return df.groupby(pd.Grouper(freq='ME'))  # Month End
        elif self.parent_period == 'Q':
            return df.groupby(pd.Grouper(freq='Q-DEC'))  # Quarter End
        elif self.parent_period == 'Y':
            return df.groupby(pd.Grouper(freq='Y'))  # Year End
        else:
            raise ValueError(f"Invalid parent period: {self.parent_period}")

    def _add_reference_fields(self, df):
        """Add reference fields to parent dataframe"""
        df['serial_id'] = self._generate_serial_ids(df)
        df['model_type'] = 1  # bar/prior bar
        df['child_period'] = self.child_period
        df['parent_period'] = self.parent_period
        df['create_date'] = datetime.now().strftime('%Y-%m-%d')
        df['create_time'] = datetime.now().strftime('%H:%M:%S')
        return df

    def _generate_serial_ids(self, df):
        """Generate unique 13-digit serial IDs"""
        base = int(datetime.now().strftime('%Y%m%d%H%M'))
        return [f"{base}{i:03d}" for i in range(len(df))]

    def _calculate_basic_indicators(self, df):
        """Calculate basic technical indicators"""
        # Calculate range
        df['range'] = df['high'] - df['low']

        # Calculate percent_r
        df['percent_r'] = (df['close'] - df['low']) / df['range']

        # Add prior bar references
        df['ro'] = df['open'].shift(1)
        df['rh'] = df['high'].shift(1)
        df['rl'] = df['low'].shift(1)
        df['rc'] = df['close'].shift(1)

        return df


class ChildProcessor:
    """Handles child calculations including bar/prior bar and gel calculations"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def process_calculations(self, df, parent_df):
        """Process all child calculations"""
        # Add trading_bop
        df = self._add_trading_bop(df)

        # Calculate bar/prior bar
        df = self._calculate_bar_prior_bar(df)

        # Calculate gel values
        df = self._calculate_gel_values(df)

        # Calculate prior parent values
        df = self._calculate_prior_parent(df)

        return df

    def _add_trading_bop(self, df):
        """Add trading bar of parent field"""
        # Group by parent period and assign sequential numbers
        if self.parent_period == 'W':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='W')).cumcount() + 1
        elif self.parent_period == 'M':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='M')).cumcount() + 1
        elif self.parent_period == 'Q':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Q')).cumcount() + 1
        elif self.parent_period == 'Y':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Y')).cumcount() + 1

        return df

    def _calculate_bar_prior_bar(self, df):
        """Calculate bar/prior bar values"""
        # REU/RED calculations
        df['reu_value'] = df.apply(
            lambda x: x['high'] - x['high'].shift(1) if x['high'] > x['high'].shift(1) else 0
        )
        df['red_value'] = df.apply(
            lambda x: abs(x['low'] - x['low'].shift(1)) if x['low'] < x['low'].shift(1) else 0
        )

        # Flags
        df['reu_flag'] = (df['reu_value'] > 0).astype(int)
        df['red_flag'] = (df['red_value'] > 0).astype(int)
        df['re_value'] = df['reu_value'] + df['red_value']
        df['re_flag'] = ((df['reu_flag'] == 1) | (df['red_flag'] == 1)).astype(int)

        return df



class GelCalculator:
    """Handles Gel calculations including expansions and pattern recognition"""

    def __init__(self):
        pass

    def calculate_gel_values(self, df):
        """Calculate all gel-related values"""
        # Initialize gel OHLC
        df = self._initialize_gel_ohlc(df)

        # Calculate gel ranges and percentages
        df = self._calculate_gel_ranges(df)

        # Calculate gel expansions
        df = self._calculate_gel_expansions(df)

        # Calculate gel patterns
        df = self._calculate_gel_patterns(df)

        return df

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        # First bar of parent sets initial values
        df['gel_open'] = df.apply(
            lambda x: x['open'] if x['trading_bop'] == 1
            else x['gel_open'].shift(1), axis=1
        )

        # Running high/low within parent
        df['gel_high'] = df.apply(
            lambda x: x['high'] if x['trading_bop'] == 1
            else max(x['high'], x['gel_high'].shift(1)), axis=1
        )

        df['gel_low'] = df.apply(
            lambda x: x['low'] if x['trading_bop'] == 1
            else min(x['low'], x['gel_low'].shift(1)), axis=1
        )

        df['gel_close'] = df['close']

        return df

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        df['gel_range'] = df['gel_high'] - df['gel_low']
        df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

        # Calculate gel ce_percent
        df['gel_ce_percent'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else 1 - x['gel_percent_r'].shift(1) if x['gel_percent_r'].shift(1) >= 0.5
            else x['gel_percent_r'].shift(1), axis=1
        )

        return df

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        # Calculate REU/RED for gel
        df['gel_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['gel_high'] - x['gel_high'].shift(1) if x['gel_high'] > x['gel_high'].shift(1)
            else 0, axis=1
        )

        df['gel_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['gel_low'] - x['gel_low'].shift(1)) if x['gel_low'] < x['gel_low'].shift(1)
            else 0, axis=1
        )

        # Set expansion flags
        df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
        df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
        df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

        return df

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        # Calculate EPC and related fields
        df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
        df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
        df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

        # Calculate E1/E2 values
        df['gel_e1_value'] = df.apply(
            lambda x: x['gel_reu_value'] if x['gel_epc_dir'] == 1
            else x['gel_red_value'], axis=1
        )

        df['gel_e2_value'] = df.apply(
            lambda x: x['gel_red_value'] if x['gel_epc_dir'] == 1
            else x['gel_reu_value'], axis=1
        )

        return df


class PriorParentCalculator:
    """Handles calculations related to prior parent period relationships"""

    def __init__(self):
        pass

    def calculate_prior_parent(self, df):
        """Calculate all prior parent related values"""
        # Initialize parent reference values
        df = self._initialize_parent_refs(df)

        # Calculate ranges and percentages
        df = self._calculate_parent_ranges(df)

        # Calculate expansions against prior parent
        df = self._calculate_parent_expansions(df)

        # Calculate patterns and directions
        df = self._calculate_parent_patterns(df)

        return df

    def _initialize_parent_refs(self, df):
        """Initialize references to parent values"""
        df['gelp_open'] = df['parent_open']

        # Running high/low against parent
        df['gelp_high'] = df.apply(
            lambda x: x['parent_high'] if x['trading_bop'] == 1
            else max(x['parent_high'], x['gel_high'].shift(1)), axis=1
        )

        df['gelp_low'] = df.apply(
            lambda x: x['parent_low'] if x['trading_bop'] == 1
            else min(x['parent_low'], x['gel_low'].shift(1)), axis=1
        )

        df['gelp_close'] = df['close'].shift(1)

        return df

    def _calculate_parent_ranges(self, df):
        """Calculate ranges and percentages against parent"""
        df['gelp_range'] = df['gelp_high'] - df['gelp_low']
        df['gelp_percent_r'] = (df['gelp_close'] - df['gelp_low']) / df['gelp_range']

        df['gelp_ce_percent'] = df.apply(
            lambda x: 1 - x['gelp_percent_r'] if x['gelp_percent_r'] >= 0.5
            else x['gelp_percent_r'], axis=1
        )

        return df

    def _calculate_parent_expansions(self, df):
        """Calculate expansions against parent values"""
        # Calculate REU/RED against parent
        df['gelp_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['high'] - x['gelp_high'] if x['high'] > x['gelp_high']
            else 0, axis=1
        )

        df['gelp_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['low'] - x['gelp_low']) if x['low'] < x['gelp_low']
            else 0, axis=1
        )

        # Set expansion flags
        df['gelp_reu_flag'] = (df['gelp_reu_value'] > 0).astype(int)
        df['gelp_red_flag'] = (df['gelp_red_value'] > 0).astype(int)
        df['gelp_re_value'] = df['gelp_reu_value'] + df['gelp_red_value']

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_dir'].shift(1) if x['gelp_re_flag'] == 0
            else 0 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else 1 if x['gelp_reu_flag'] == 1
            else x['gelp_dir'].shift(1), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_rpc'].shift(1) if x['gelp_dir'] == x['gelp_dir'].shift(1)
            else 2 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == x['gelp_dir'].shift(1)
            else 1, axis=1
        )

        return df


class SummaryGenerator:
    """Generates summary statistics and analysis"""

    def __init__(self):
        pass

    def generate(self, parent_df, child_df, ticker):
        """Generate complete summary statistics"""
        summary = {}

        # Parent level statistics
        summary.update(self._generate_parent_stats(parent_df))

        # Child level statistics
        summary.update(self._generate_child_stats(child_df))

        # Pattern analysis
        summary.update(self._analyze_patterns(parent_df, child_df))

        # Export summary
        self._export_summary(summary, ticker)

        return summary

    def _generate_parent_stats(self, df):
        """Generate parent level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        # E1 FRE flag by EPC HP
        e1_hp_stats = df[df['epc_hp'] == 1]['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_hp_counts'] = e1_hp_stats.to_dict()
        stats['e1_fre_flag_hp_percentages'] = (e1_hp_stats / len(df[df['epc_hp'] == 1]) * 100).to_dict()

        # Range histogram
        stats['range_histogram'] = df['range'].describe().to_dict()

        return stats

    def _generate_child_stats(self, df):
        """Generate child level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['child_e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['child_e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        return stats

    def _analyze_patterns(self, parent_df, child_df):
        """Analyze patterns across parent and child data"""
        patterns = {}

        # Analyze expansion patterns
        patterns['avg_expansion_by_period'] = child_df.groupby('trading_bop')['re_value'].mean().to_dict()

        # Analyze direction persistence
        patterns['direction_persistence'] = self._calculate_direction_persistence(child_df)

        # Analyze high/low positioning
        patterns['hl_position_stats'] = self._analyze_hl_positions(child_df)

        return patterns

    def _calculate_direction_persistence(self, df):
        """Calculate statistics about direction persistence"""
        return {
            'avg_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().mean(),
            'max_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().max()
        }

    def _analyze_hl_positions(self, df):
        """Analyze high/low position patterns"""
        return {
            'early_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) & (df['trading_bop'] <= 2)]),
            'middle_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                               (df['trading_bop'] > 2) & (df['trading_bop'] < df['parent_duration'] - 1)]),
            'late_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                             (df['trading_bop'] >= df['parent_duration'] - 1)])
        }

    def _export_summary(self, summary, ticker):
        """Export summary statistics to CSV"""
        # Convert nested dict to flat format for CSV
        flat_summary = self._flatten_dict(summary)

        # Create DataFrame and export
        summary_df = pd.DataFrame([flat_summary])
        summary_df.to_csv(f'output_gel_sum/{ticker}_summary.csv', index=False)

    def _flatten_dict(self, d, parent_key='', sep='_'):
        """Flatten nested dictionary for CSV export"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

class ParentUpdater:
    """Updates parent file with aggregated child data"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def update_parent(self, parent_df, child_df):
        """Update parent with aggregated child data"""
        # Calculate intrabar counts
        intrabar_counts = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).agg({
            'reu_flag': 'sum',
            'red_flag': 'sum',
            'rpc': 'sum'
        }).rename(columns={
            'reu_flag': 'intrabar_reu_count',
            'red_flag': 'intrabar_red_count',
            'rpc': 'intrabar_rpc'
        })

        # Calculate priorbar counts
        priorbar_counts = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).agg({
            'gelp_reu_flag': 'sum',
            'gelp_red_flag': 'sum',
            'gelp_red_flag': 'sum'  # Using red_flag for rpc as per spec
        }).rename(columns={
            'gelp_reu_flag': 'priorbar_reu_count',
            'gelp_red_flag': 'priorbar_red_count',
            'gelp_red_flag': 'priorbar_rpc'
        })

        # Find first/last RE positions
        def get_positions(group):
            re_bars = group[group['gelp_re_flag'] == 1]['trading_bop']
            return pd.Series({
                'priorbar_first_re': re_bars.iloc[0] if len(re_bars) > 0 else None,
                'priorbar_last_re': re_bars.iloc[-1] if len(re_bars) > 0 else None
            })

        positions = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).apply(get_positions)

        # Merge all updates into parent
        parent_df = parent_df.join(intrabar_counts, how='left')
        parent_df = parent_df.join(priorbar_counts, how='left')
        parent_df = parent_df.join(positions, how='left')

        return parent_df


def main():
    # Initialize processor
    processor = GetSetsProcessor(child_period='D', parent_period='M')

    # Process all files in input directory
    input_dir = Path('/content/input')

    if not input_dir.exists():
        raise FileNotFoundError(f"Input directory not found: {input_dir}")

    # Process each CSV file
    for file_path in input_dir.glob('*.csv'):
        try:
            print(f"\nProcessing file: {file_path}")
            processor.process_file(file_path)
            print(f"Successfully processed: {file_path}")

        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")
            logging.error(f"Error processing {file_path}: {str(e)}", exc_info=True)

# Run the processor
main()

ERROR:GetSets:Error processing file /content/input/AAPL_D_1.csv: "Column(s) ['volume'] do not exist"
ERROR:root:Error processing /content/input/AAPL_D_1.csv: "Column(s) ['volume'] do not exist"
Traceback (most recent call last):
  File "<ipython-input-5-7cf191bc6a55>", line 668, in main
    processor.process_file(file_path)
  File "<ipython-input-5-7cf191bc6a55>", line 60, in process_file
    parent_df = self._create_parent_file(df)
  File "<ipython-input-5-7cf191bc6a55>", line 129, in _create_parent_file
    return parent_processor.create_parent_file(df)
  File "<ipython-input-5-7cf191bc6a55>", line 178, in create_parent_file
    parent_df = grouped.agg({
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/generic.py", line 1432, in aggregate
    result = op.agg()
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/apply.py", line 190, in agg
    return self.agg_dict_like()
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/apply.py", line 423, in agg_


Processing file: /content/input/AAPL_D_1.csv
Error processing /content/input/AAPL_D_1.csv: "Column(s) ['volume'] do not exist"

Processing file: /content/input/MMM_D_1.csv
Error processing /content/input/MMM_D_1.csv: "Column(s) ['volume'] do not exist"

Processing file: /content/input/XLB_D_1.csv
Error processing /content/input/XLB_D_1.csv: "Column(s) ['volume'] do not exist"

Processing file: /content/input/AFL_D_1.csv
Error processing /content/input/AFL_D_1.csv: "Column(s) ['volume'] do not exist"


In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import logging

class GetSetsProcessor:
    """Main processor for Get Sets analysis"""

    def __init__(self, child_period='D', parent_period='M', jobname='Gelset'):
        self.child_period = child_period
        self.parent_period = parent_period
        self.jobname = jobname
        self.setup_logging()

        # Validate period combinations
        self._validate_periods()

        # Setup paths
        self.setup_directories()

    def _validate_periods(self):
        """Validate period combinations"""
        valid_combinations = {
            ('D', 'W'), ('D', 'M'),
            ('M', 'Q'), ('M', 'Y'),
            ('Q', 'Y'), ('D', 'Q')
        }
        if (self.child_period, self.parent_period) not in valid_combinations:
            raise ValueError(f"Invalid period combination: {self.child_period}/{self.parent_period}")

    def setup_logging(self):
        """Initialize logging"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'getsets_{datetime.now().strftime("%Y%m%d")}.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('GetSets')

    def setup_directories(self):
        """Create required output directories"""
        directories = ['output_child', 'output_parent', 'output_gel_sum']
        for dir_name in directories:
            Path(dir_name).mkdir(exist_ok=True)

    def process_file(self, filepath):
        """Process a single input file"""
        try:
            # Extract file information
            ticker, temporal_period, rolling_range = self._parse_filename(filepath)

            # Load and validate input data
            df = self._load_input_file(filepath)

            # Create parent file
            parent_df = self._create_parent_file(df)

            # Process child calculations
            child_df = self._process_child_calculations(df, parent_df)

            # Update parent with child data
            parent_df = self._update_parent_with_child_data(parent_df, child_df)

            # Generate summary statistics
            self._generate_summary(parent_df, child_df, ticker)

            # Export results
            self._export_results(parent_df, child_df, ticker)

            return True

        except Exception as e:
            self.logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _parse_filename(self, filepath):
        """Parse input filename for ticker and metadata"""
        filename = Path(filepath).stem
        parts = filename.split('_')
        if len(parts) != 3:
            raise ValueError(f"Invalid filename format: {filename}")
        return parts[0], parts[1], parts[2]

    def _load_input_file(self, filepath):
        """Load and validate input file"""
        # Read CSV with date parsing
        df = pd.read_csv(filepath)

        # Validate required columns
        required_cols = ['date', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Convert date to datetime and set as index
        try:
            df['date'] = pd.to_datetime(df['date'])
            df.sort_values('date', inplace=True)
            df.set_index('date', inplace=True)
        except Exception as e:
            raise ValueError(f"Error processing date column: {str(e)}")

        # Validate OHLC relationships
        valid_mask = (
            (df['low'] <= df['open']) &
            (df['low'] <= df['close']) &
            (df['high'] >= df['open']) &
            (df['high'] >= df['close']) &
            (df['high'] - df['low'] > 0)
        )

        invalid_rows = df[~valid_mask]
        if len(invalid_rows) > 0:
            self.logger.warning(f"Removing {len(invalid_rows)} invalid rows")
            df = df[valid_mask]

        return df

    def _create_parent_file(self, df):
        """Create parent file from child data"""
        parent_processor = ParentProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return parent_processor.create_parent_file(df)

    def _process_child_calculations(self, df, parent_df):
        """Process all child calculations"""
        child_processor = ChildProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return child_processor.process_calculations(df, parent_df)

    def _update_parent_with_child_data(self, parent_df, child_df):
        """Update parent file with aggregated child data"""
        updater = ParentUpdater(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return updater.update_parent(parent_df, child_df)

    def _generate_summary(self, parent_df, child_df, ticker):
        """Generate summary statistics"""
        summary = SummaryGenerator()
        return summary.generate(parent_df, child_df, ticker)

    def _export_results(self, parent_df, child_df, ticker):
        """Export results to CSV files"""
        # Export child file
        child_path = f'output_child/{ticker}_child_{self.child_period}.csv'
        child_df.to_csv(child_path, index=False)

        # Export parent file
        parent_path = f'output_parent/{ticker}_parent_{self.parent_period}.csv'
        parent_df.to_csv(parent_path, index=False)

        self.logger.info(f"Results exported: {child_path}, {parent_path}")


class ParentProcessor:
    """Handles creation and processing of parent file"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def create_parent_file(self, df):
        """Create parent file by aggregating child data"""
        # Group by parent period
        grouped = self._group_by_parent_period(df)

        # Define base OHLC aggregation
        agg_dict = {
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last'
        }

        # Add volume if it exists
        if 'volume' in df.columns:
            agg_dict['volume'] = 'sum'

        # Aggregate using available columns
        parent_df = grouped.agg(agg_dict).reset_index()

        # Add reference fields
        parent_df = self._add_reference_fields(parent_df)

        # Calculate basic indicators
        parent_df = self._calculate_basic_indicators(parent_df)

        return parent_df

    def _group_by_parent_period(self, df):
        """Group data by parent period"""
        if self.parent_period == 'W':
            return df.groupby(pd.Grouper(freq='W-FRI'))  # End on Friday
        elif self.parent_period == 'M':
            return df.groupby(pd.Grouper(freq='ME'))  # Month End
        elif self.parent_period == 'Q':
            return df.groupby(pd.Grouper(freq='Q-DEC'))  # Quarter End
        elif self.parent_period == 'Y':
            return df.groupby(pd.Grouper(freq='Y'))  # Year End
        else:
            raise ValueError(f"Invalid parent period: {self.parent_period}")

    def _add_reference_fields(self, df):
        """Add reference fields to parent dataframe"""
        df['serial_id'] = self._generate_serial_ids(df)
        df['model_type'] = 1  # bar/prior bar
        df['child_period'] = self.child_period
        df['parent_period'] = self.parent_period
        df['create_date'] = datetime.now().strftime('%Y-%m-%d')
        df['create_time'] = datetime.now().strftime('%H:%M:%S')
        return df

    def _generate_serial_ids(self, df):
        """Generate unique 13-digit serial IDs"""
        base = int(datetime.now().strftime('%Y%m%d%H%M'))
        return [f"{base}{i:03d}" for i in range(len(df))]

    def _calculate_basic_indicators(self, df):
        """Calculate basic technical indicators"""
        # Calculate range
        df['range'] = df['high'] - df['low']

        # Calculate percent_r
        df['percent_r'] = (df['close'] - df['low']) / df['range']

        # Add prior bar references
        df['ro'] = df['open'].shift(1)
        df['rh'] = df['high'].shift(1)
        df['rl'] = df['low'].shift(1)
        df['rc'] = df['close'].shift(1)

        return df


class ChildProcessor:
    """Handles child calculations including bar/prior bar and gel calculations"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def process_calculations(self, df, parent_df):
        """Process all child calculations"""
        # Add trading_bop
        df = self._add_trading_bop(df)

        # Calculate bar/prior bar
        df = self._calculate_bar_prior_bar(df)

        # Calculate gel values
        df = self._calculate_gel_values(df)

        # Calculate prior parent values
        df = self._calculate_prior_parent(df)

        return df

    def _add_trading_bop(self, df):
        """Add trading bar of parent field"""
        # Group by parent period and assign sequential numbers
        if self.parent_period == 'W':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='W')).cumcount() + 1
        elif self.parent_period == 'M':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='M')).cumcount() + 1
        elif self.parent_period == 'Q':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Q')).cumcount() + 1
        elif self.parent_period == 'Y':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Y')).cumcount() + 1

        return df

    def _calculate_bar_prior_bar(self, df):
        """Calculate bar/prior bar values"""
        # REU/RED calculations
        df['reu_value'] = df.apply(
            lambda x: x['high'] - x['high'].shift(1) if x['high'] > x['high'].shift(1) else 0
        )
        df['red_value'] = df.apply(
            lambda x: abs(x['low'] - x['low'].shift(1)) if x['low'] < x['low'].shift(1) else 0
        )

        # Flags
        df['reu_flag'] = (df['reu_value'] > 0).astype(int)
        df['red_flag'] = (df['red_value'] > 0).astype(int)
        df['re_value'] = df['reu_value'] + df['red_value']
        df['re_flag'] = ((df['reu_flag'] == 1) | (df['red_flag'] == 1)).astype(int)

        return df



class GelCalculator:
    """Handles Gel calculations including expansions and pattern recognition"""

    def __init__(self):
        pass

    def calculate_gel_values(self, df):
        """Calculate all gel-related values"""
        # Initialize gel OHLC
        df = self._initialize_gel_ohlc(df)

        # Calculate gel ranges and percentages
        df = self._calculate_gel_ranges(df)

        # Calculate gel expansions
        df = self._calculate_gel_expansions(df)

        # Calculate gel patterns
        df = self._calculate_gel_patterns(df)

        return df

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        # First bar of parent sets initial values
        df['gel_open'] = df.apply(
            lambda x: x['open'] if x['trading_bop'] == 1
            else x['gel_open'].shift(1), axis=1
        )

        # Running high/low within parent
        df['gel_high'] = df.apply(
            lambda x: x['high'] if x['trading_bop'] == 1
            else max(x['high'], x['gel_high'].shift(1)), axis=1
        )

        df['gel_low'] = df.apply(
            lambda x: x['low'] if x['trading_bop'] == 1
            else min(x['low'], x['gel_low'].shift(1)), axis=1
        )

        df['gel_close'] = df['close']

        return df

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        df['gel_range'] = df['gel_high'] - df['gel_low']
        df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

        # Calculate gel ce_percent
        df['gel_ce_percent'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else 1 - x['gel_percent_r'].shift(1) if x['gel_percent_r'].shift(1) >= 0.5
            else x['gel_percent_r'].shift(1), axis=1
        )

        return df

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        # Calculate REU/RED for gel
        df['gel_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['gel_high'] - x['gel_high'].shift(1) if x['gel_high'] > x['gel_high'].shift(1)
            else 0, axis=1
        )

        df['gel_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['gel_low'] - x['gel_low'].shift(1)) if x['gel_low'] < x['gel_low'].shift(1)
            else 0, axis=1
        )

        # Set expansion flags
        df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
        df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
        df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

        return df

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        # Calculate EPC and related fields
        df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
        df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
        df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

        # Calculate E1/E2 values
        df['gel_e1_value'] = df.apply(
            lambda x: x['gel_reu_value'] if x['gel_epc_dir'] == 1
            else x['gel_red_value'], axis=1
        )

        df['gel_e2_value'] = df.apply(
            lambda x: x['gel_red_value'] if x['gel_epc_dir'] == 1
            else x['gel_reu_value'], axis=1
        )

        return df


class PriorParentCalculator:
    """Handles calculations related to prior parent period relationships"""

    def __init__(self):
        pass

    def calculate_prior_parent(self, df):
        """Calculate all prior parent related values"""
        # Initialize parent reference values
        df = self._initialize_parent_refs(df)

        # Calculate ranges and percentages
        df = self._calculate_parent_ranges(df)

        # Calculate expansions against prior parent
        df = self._calculate_parent_expansions(df)

        # Calculate patterns and directions
        df = self._calculate_parent_patterns(df)

        return df

    def _initialize_parent_refs(self, df):
        """Initialize references to parent values"""
        df['gelp_open'] = df['parent_open']

        # Running high/low against parent
        df['gelp_high'] = df.apply(
            lambda x: x['parent_high'] if x['trading_bop'] == 1
            else max(x['parent_high'], x['gel_high'].shift(1)), axis=1
        )

        df['gelp_low'] = df.apply(
            lambda x: x['parent_low'] if x['trading_bop'] == 1
            else min(x['parent_low'], x['gel_low'].shift(1)), axis=1
        )

        df['gelp_close'] = df['close'].shift(1)

        return df

    def _calculate_parent_ranges(self, df):
        """Calculate ranges and percentages against parent"""
        df['gelp_range'] = df['gelp_high'] - df['gelp_low']
        df['gelp_percent_r'] = (df['gelp_close'] - df['gelp_low']) / df['gelp_range']

        df['gelp_ce_percent'] = df.apply(
            lambda x: 1 - x['gelp_percent_r'] if x['gelp_percent_r'] >= 0.5
            else x['gelp_percent_r'], axis=1
        )

        return df

    def _calculate_parent_expansions(self, df):
        """Calculate expansions against parent values"""
        # Calculate REU/RED against parent
        df['gelp_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['high'] - x['gelp_high'] if x['high'] > x['gelp_high']
            else 0, axis=1
        )

        df['gelp_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['low'] - x['gelp_low']) if x['low'] < x['gelp_low']
            else 0, axis=1
        )

        # Set expansion flags
        df['gelp_reu_flag'] = (df['gelp_reu_value'] > 0).astype(int)
        df['gelp_red_flag'] = (df['gelp_red_value'] > 0).astype(int)
        df['gelp_re_value'] = df['gelp_reu_value'] + df['gelp_red_value']

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_dir'].shift(1) if x['gelp_re_flag'] == 0
            else 0 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else 1 if x['gelp_reu_flag'] == 1
            else x['gelp_dir'].shift(1), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_rpc'].shift(1) if x['gelp_dir'] == x['gelp_dir'].shift(1)
            else 2 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == x['gelp_dir'].shift(1)
            else 1, axis=1
        )

        return df


class SummaryGenerator:
    """Generates summary statistics and analysis"""

    def __init__(self):
        pass

    def generate(self, parent_df, child_df, ticker):
        """Generate complete summary statistics"""
        summary = {}

        # Parent level statistics
        summary.update(self._generate_parent_stats(parent_df))

        # Child level statistics
        summary.update(self._generate_child_stats(child_df))

        # Pattern analysis
        summary.update(self._analyze_patterns(parent_df, child_df))

        # Export summary
        self._export_summary(summary, ticker)

        return summary

    def _generate_parent_stats(self, df):
        """Generate parent level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        # E1 FRE flag by EPC HP
        e1_hp_stats = df[df['epc_hp'] == 1]['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_hp_counts'] = e1_hp_stats.to_dict()
        stats['e1_fre_flag_hp_percentages'] = (e1_hp_stats / len(df[df['epc_hp'] == 1]) * 100).to_dict()

        # Range histogram
        stats['range_histogram'] = df['range'].describe().to_dict()

        return stats

    def _generate_child_stats(self, df):
        """Generate child level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['child_e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['child_e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        return stats

    def _analyze_patterns(self, parent_df, child_df):
        """Analyze patterns across parent and child data"""
        patterns = {}

        # Analyze expansion patterns
        patterns['avg_expansion_by_period'] = child_df.groupby('trading_bop')['re_value'].mean().to_dict()

        # Analyze direction persistence
        patterns['direction_persistence'] = self._calculate_direction_persistence(child_df)

        # Analyze high/low positioning
        patterns['hl_position_stats'] = self._analyze_hl_positions(child_df)

        return patterns

    def _calculate_direction_persistence(self, df):
        """Calculate statistics about direction persistence"""
        return {
            'avg_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().mean(),
            'max_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().max()
        }

    def _analyze_hl_positions(self, df):
        """Analyze high/low position patterns"""
        return {
            'early_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) & (df['trading_bop'] <= 2)]),
            'middle_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                               (df['trading_bop'] > 2) & (df['trading_bop'] < df['parent_duration'] - 1)]),
            'late_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                             (df['trading_bop'] >= df['parent_duration'] - 1)])
        }

    def _export_summary(self, summary, ticker):
        """Export summary statistics to CSV"""
        # Convert nested dict to flat format for CSV
        flat_summary = self._flatten_dict(summary)

        # Create DataFrame and export
        summary_df = pd.DataFrame([flat_summary])
        summary_df.to_csv(f'output_gel_sum/{ticker}_summary.csv', index=False)

    def _flatten_dict(self, d, parent_key='', sep='_'):
        """Flatten nested dictionary for CSV export"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import logging

class GetSetsProcessor:
    """Main processor for Get Sets analysis"""

    def __init__(self, child_period='D', parent_period='M', jobname='Gelset'):
        self.child_period = child_period
        self.parent_period = parent_period
        self.jobname = jobname
        self.setup_logging()

        # Validate period combinations
        self._validate_periods()

        # Setup paths
        self.setup_directories()

    def _validate_periods(self):
        """Validate period combinations"""
        valid_combinations = {
            ('D', 'W'), ('D', 'M'),
            ('M', 'Q'), ('M', 'Y'),
            ('Q', 'Y'), ('D', 'Q')
        }
        if (self.child_period, self.parent_period) not in valid_combinations:
            raise ValueError(f"Invalid period combination: {self.child_period}/{self.parent_period}")

    def setup_logging(self):
        """Initialize logging"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'getsets_{datetime.now().strftime("%Y%m%d")}.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('GetSets')

    def setup_directories(self):
        """Create required output directories"""
        directories = ['output_child', 'output_parent', 'output_gel_sum']
        for dir_name in directories:
            Path(dir_name).mkdir(exist_ok=True)

    def process_file(self, filepath):
        """Process a single input file"""
        try:
            # Extract file information
            ticker, temporal_period, rolling_range = self._parse_filename(filepath)

            # Load and validate input data
            df = self._load_input_file(filepath)

            # Create parent file
            parent_df = self._create_parent_file(df)

            # Process child calculations
            child_df = self._process_child_calculations(df, parent_df)

            # Update parent with child data
            parent_df = self._update_parent_with_child_data(parent_df, child_df)

            # Generate summary statistics
            self._generate_summary(parent_df, child_df, ticker)

            # Export results
            self._export_results(parent_df, child_df, ticker)

            return True

        except Exception as e:
            self.logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _parse_filename(self, filepath):
        """Parse input filename for ticker and metadata"""
        filename = Path(filepath).stem
        parts = filename.split('_')
        if len(parts) != 3:
            raise ValueError(f"Invalid filename format: {filename}")
        return parts[0], parts[1], parts[2]

    def _load_input_file(self, filepath):
        """Load and validate input file"""
        # Read CSV with date parsing
        df = pd.read_csv(filepath)

        # Validate required columns
        required_cols = ['date', 'open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Convert date to datetime and set as index
        try:
            df['date'] = pd.to_datetime(df['date'])
            df.sort_values('date', inplace=True)
            df.set_index('date', inplace=True)
        except Exception as e:
            raise ValueError(f"Error processing date column: {str(e)}")

        # Validate OHLC relationships
        valid_mask = (
            (df['low'] <= df['open']) &
            (df['low'] <= df['close']) &
            (df['high'] >= df['open']) &
            (df['high'] >= df['close']) &
            (df['high'] - df['low'] > 0)
        )

        invalid_rows = df[~valid_mask]
        if len(invalid_rows) > 0:
            self.logger.warning(f"Removing {len(invalid_rows)} invalid rows")
            df = df[valid_mask]

        return df

    def _create_parent_file(self, df):
        """Create parent file from child data"""
        parent_processor = ParentProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return parent_processor.create_parent_file(df)

    def _process_child_calculations(self, df, parent_df):
        """Process all child calculations"""
        child_processor = ChildProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return child_processor.process_calculations(df, parent_df)

    def _update_parent_with_child_data(self, parent_df, child_df):
        """Update parent file with aggregated child data"""
        updater = ParentUpdater(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return updater.update_parent(parent_df, child_df)

    def _generate_summary(self, parent_df, child_df, ticker):
        """Generate summary statistics"""
        summary = SummaryGenerator()
        return summary.generate(parent_df, child_df, ticker)

    def _export_results(self, parent_df, child_df, ticker):
        """Export results to CSV files"""
        # Export child file
        child_path = f'output_child/{ticker}_child_{self.child_period}.csv'
        child_df.to_csv(child_path, index=False)

        # Export parent file
        parent_path = f'output_parent/{ticker}_parent_{self.parent_period}.csv'
        parent_df.to_csv(parent_path, index=False)

        self.logger.info(f"Results exported: {child_path}, {parent_path}")


class ParentProcessor:
    """Handles creation and processing of parent file"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def create_parent_file(self, df):
        """Create parent file by aggregating child data"""
        # Group by parent period
        grouped = self._group_by_parent_period(df)

        # Define base OHLC aggregation
        agg_dict = {
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last'
        }

        # Add volume if it exists
        if 'volume' in df.columns:
            agg_dict['volume'] = 'sum'

        # Aggregate using available columns
        parent_df = grouped.agg(agg_dict).reset_index()

        # Add reference fields
        parent_df = self._add_reference_fields(parent_df)

        # Calculate basic indicators
        parent_df = self._calculate_basic_indicators(parent_df)

        return parent_df

    def _group_by_parent_period(self, df):
        """Group data by parent period"""
        if self.parent_period == 'W':
            return df.groupby(pd.Grouper(freq='W-FRI'))  # End on Friday
        elif self.parent_period == 'M':
            return df.groupby(pd.Grouper(freq='ME'))  # Month End
        elif self.parent_period == 'Q':
            return df.groupby(pd.Grouper(freq='Q-DEC'))  # Quarter End
        elif self.parent_period == 'Y':
            return df.groupby(pd.Grouper(freq='Y'))  # Year End
        else:
            raise ValueError(f"Invalid parent period: {self.parent_period}")

    def _add_reference_fields(self, df):
        """Add reference fields to parent dataframe"""
        df['serial_id'] = self._generate_serial_ids(df)
        df['model_type'] = 1  # bar/prior bar
        df['child_period'] = self.child_period
        df['parent_period'] = self.parent_period
        df['create_date'] = datetime.now().strftime('%Y-%m-%d')
        df['create_time'] = datetime.now().strftime('%H:%M:%S')
        return df

    def _generate_serial_ids(self, df):
        """Generate unique 13-digit serial IDs"""
        base = int(datetime.now().strftime('%Y%m%d%H%M'))
        return [f"{base}{i:03d}" for i in range(len(df))]

    def _calculate_basic_indicators(self, df):
        """Calculate basic technical indicators"""
        # Calculate range
        df['range'] = df['high'] - df['low']

        # Calculate percent_r
        df['percent_r'] = (df['close'] - df['low']) / df['range']

        # Add prior bar references
        df['ro'] = df['open'].shift(1)
        df['rh'] = df['high'].shift(1)
        df['rl'] = df['low'].shift(1)
        df['rc'] = df['close'].shift(1)

        return df


class ChildProcessor:
    """Handles child calculations including bar/prior bar and gel calculations"""

    def __init__(self, child_period, parent_period):
        self.child_period = child_period
        self.parent_period = parent_period

    def process_calculations(self, df, parent_df):
        """Process all child calculations"""
        # Add trading_bop
        df = self._add_trading_bop(df)

        # Calculate bar/prior bar
        df = self._calculate_bar_prior_bar(df)

        # Calculate gel values
        df = self._calculate_gel_values(df)

        # Calculate prior parent values
        df = self._calculate_prior_parent(df)

        return df

    def _add_trading_bop(self, df):
        """Add trading bar of parent field"""
        # Group by parent period and assign sequential numbers
        if self.parent_period == 'W':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='W')).cumcount() + 1
        elif self.parent_period == 'M':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='M')).cumcount() + 1
        elif self.parent_period == 'Q':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Q')).cumcount() + 1
        elif self.parent_period == 'Y':
            df['trading_bop'] = df.groupby(pd.Grouper(key='date', freq='Y')).cumcount() + 1

        return df

    def _calculate_bar_prior_bar(self, df):
        """Calculate bar/prior bar values"""
        # REU/RED calculations
        df['reu_value'] = df.apply(
            lambda x: x['high'] - x['high'].shift(1) if x['high'] > x['high'].shift(1) else 0
        )
        df['red_value'] = df.apply(
            lambda x: abs(x['low'] - x['low'].shift(1)) if x['low'] < x['low'].shift(1) else 0
        )

        # Flags
        df['reu_flag'] = (df['reu_value'] > 0).astype(int)
        df['red_flag'] = (df['red_value'] > 0).astype(int)
        df['re_value'] = df['reu_value'] + df['red_value']
        df['re_flag'] = ((df['reu_flag'] == 1) | (df['red_flag'] == 1)).astype(int)

        return df



class GelCalculator:
    """Handles Gel calculations including expansions and pattern recognition"""

    def __init__(self):
        pass

    def calculate_gel_values(self, df):
        """Calculate all gel-related values"""
        # Initialize gel OHLC
        df = self._initialize_gel_ohlc(df)

        # Calculate gel ranges and percentages
        df = self._calculate_gel_ranges(df)

        # Calculate gel expansions
        df = self._calculate_gel_expansions(df)

        # Calculate gel patterns
        df = self._calculate_gel_patterns(df)

        return df

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        # First bar of parent sets initial values
        df['gel_open'] = df.apply(
            lambda x: x['open'] if x['trading_bop'] == 1
            else x['gel_open'].shift(1), axis=1
        )

        # Running high/low within parent
        df['gel_high'] = df.apply(
            lambda x: x['high'] if x['trading_bop'] == 1
            else max(x['high'], x['gel_high'].shift(1)), axis=1
        )

        df['gel_low'] = df.apply(
            lambda x: x['low'] if x['trading_bop'] == 1
            else min(x['low'], x['gel_low'].shift(1)), axis=1
        )

        df['gel_close'] = df['close']

        return df

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        df['gel_range'] = df['gel_high'] - df['gel_low']
        df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

        # Calculate gel ce_percent
        df['gel_ce_percent'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else 1 - x['gel_percent_r'].shift(1) if x['gel_percent_r'].shift(1) >= 0.5
            else x['gel_percent_r'].shift(1), axis=1
        )

        return df

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        # Calculate REU/RED for gel
        df['gel_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['gel_high'] - x['gel_high'].shift(1) if x['gel_high'] > x['gel_high'].shift(1)
            else 0, axis=1
        )

        df['gel_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['gel_low'] - x['gel_low'].shift(1)) if x['gel_low'] < x['gel_low'].shift(1)
            else 0, axis=1
        )

        # Set expansion flags
        df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
        df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
        df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

        return df

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        # Calculate EPC and related fields
        df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
        df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
        df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

        # Calculate E1/E2 values
        df['gel_e1_value'] = df.apply(
            lambda x: x['gel_reu_value'] if x['gel_epc_dir'] == 1
            else x['gel_red_value'], axis=1
        )

        df['gel_e2_value'] = df.apply(
            lambda x: x['gel_red_value'] if x['gel_epc_dir'] == 1
            else x['gel_reu_value'], axis=1
        )

        return df


class PriorParentCalculator:
    """Handles calculations related to prior parent period relationships"""

    def __init__(self):
        pass

    def calculate_prior_parent(self, df):
        """Calculate all prior parent related values"""
        # Initialize parent reference values
        df = self._initialize_parent_refs(df)

        # Calculate ranges and percentages
        df = self._calculate_parent_ranges(df)

        # Calculate expansions against prior parent
        df = self._calculate_parent_expansions(df)

        # Calculate patterns and directions
        df = self._calculate_parent_patterns(df)

        return df

    def _initialize_parent_refs(self, df):
        """Initialize references to parent values"""
        df['gelp_open'] = df['parent_open']

        # Running high/low against parent
        df['gelp_high'] = df.apply(
            lambda x: x['parent_high'] if x['trading_bop'] == 1
            else max(x['parent_high'], x['gel_high'].shift(1)), axis=1
        )

        df['gelp_low'] = df.apply(
            lambda x: x['parent_low'] if x['trading_bop'] == 1
            else min(x['parent_low'], x['gel_low'].shift(1)), axis=1
        )

        df['gelp_close'] = df['close'].shift(1)

        return df

    def _calculate_parent_ranges(self, df):
        """Calculate ranges and percentages against parent"""
        df['gelp_range'] = df['gelp_high'] - df['gelp_low']
        df['gelp_percent_r'] = (df['gelp_close'] - df['gelp_low']) / df['gelp_range']

        df['gelp_ce_percent'] = df.apply(
            lambda x: 1 - x['gelp_percent_r'] if x['gelp_percent_r'] >= 0.5
            else x['gelp_percent_r'], axis=1
        )

        return df

    def _calculate_parent_expansions(self, df):
        """Calculate expansions against parent values"""
        # Calculate REU/RED against parent
        df['gelp_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['high'] - x['gelp_high'] if x['high'] > x['gelp_high']
            else 0, axis=1
        )

        df['gelp_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['low'] - x['gelp_low']) if x['low'] < x['gelp_low']
            else 0, axis=1
        )

        # Set expansion flags
        df['gelp_reu_flag'] = (df['gelp_reu_value'] > 0).astype(int)
        df['gelp_red_flag'] = (df['gelp_red_value'] > 0).astype(int)
        df['gelp_re_value'] = df['gelp_reu_value'] + df['gelp_red_value']

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_dir'].shift(1) if x['gelp_re_flag'] == 0
            else 0 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else 1 if x['gelp_reu_flag'] == 1
            else x['gelp_dir'].shift(1), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else x['gelp_rpc'].shift(1) if x['gelp_dir'] == x['gelp_dir'].shift(1)
            else 2 if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == x['gelp_dir'].shift(1)
            else 1, axis=1
        )

        return df


class SummaryGenerator:
    """Generates summary statistics and analysis"""

    def __init__(self):
        pass

    def generate(self, parent_df, child_df, ticker):
        """Generate complete summary statistics"""
        summary = {}

        # Parent level statistics
        summary.update(self._generate_parent_stats(parent_df))

        # Child level statistics
        summary.update(self._generate_child_stats(child_df))

        # Pattern analysis
        summary.update(self._analyze_patterns(parent_df, child_df))

        # Export summary
        self._export_summary(summary, ticker)

        return summary

    def _generate_parent_stats(self, df):
        """Generate parent level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        # E1 FRE flag by EPC HP
        e1_hp_stats = df[df['epc_hp'] == 1]['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_hp_counts'] = e1_hp_stats.to_dict()
        stats['e1_fre_flag_hp_percentages'] = (e1_hp_stats / len(df[df['epc_hp'] == 1]) * 100).to_dict()

        # Range histogram
        stats['range_histogram'] = df['range'].describe().to_dict()

        return stats

    def _generate_child_stats(self, df):
        """Generate child level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['child_e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['child_e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        return stats

    def _analyze_patterns(self, parent_df, child_df):
        """Analyze patterns across parent and child data"""
        patterns = {}

        # Analyze expansion patterns
        patterns['avg_expansion_by_period'] = child_df.groupby('trading_bop')['re_value'].mean().to_dict()

        # Analyze direction persistence
        patterns['direction_persistence'] = self._calculate_direction_persistence(child_df)

        # Analyze high/low positioning
        patterns['hl_position_stats'] = self._analyze_hl_positions(child_df)

        return patterns

    def _calculate_direction_persistence(self, df):
        """Calculate statistics about direction persistence"""
        return {
            'avg_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().mean(),
            'max_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().max()
        }

    def _analyze_hl_positions(self, df):
        """Analyze high/low position patterns"""
        return {
            'early_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) & (df['trading_bop'] <= 2)]),
            'middle_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                               (df['trading_bop'] > 2) & (df['trading_bop'] < df['parent_duration'] - 1)]),
            'late_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                             (df['trading_bop'] >= df['parent_duration'] - 1)])
        }

    def _export_summary(self, summary, ticker):
        """Export summary statistics to CSV"""
        # Convert nested dict to flat format for CSV
        flat_summary = self._flatten_dict(summary)

        # Create DataFrame and export
        summary_df = pd.DataFrame([flat_summary])
        summary_df.to_csv(f'output_gel_sum/{ticker}_summary.csv', index=False)

    def _flatten_dict(self, d, parent_key='', sep='_'):
        """Flatten nested dictionary for CSV export"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from pathlib import Path
import logging

class ProcessorBase:
    """Base class for all processors with common functionality"""

    def __init__(self):
        self.logger = logging.getLogger('GetSets')

class GetSetsProcessor(ProcessorBase):
    """Main processor for Get Sets analysis"""

    def __init__(self, child_period='D', parent_period='M', jobname='Gelset'):
        super().__init__()
        self.child_period = child_period
        self.parent_period = parent_period
        self.jobname = jobname
        self.setup_logging()

        # Validate period combinations
        self._validate_periods()

        # Setup paths
        self.setup_directories()

    def _validate_periods(self):
        """Validate period combinations"""
        valid_combinations = {
            ('D', 'W'), ('D', 'M'),
            ('M', 'Q'), ('M', 'Y'),
            ('Q', 'Y'), ('D', 'Q')
        }
        if (self.child_period, self.parent_period) not in valid_combinations:
            raise ValueError(f"Invalid period combination: {self.child_period}/{self.parent_period}")

    def setup_logging(self):
        """Initialize logging"""
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(f'getsets_{datetime.now().strftime("%Y%m%d")}.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('GetSets')

    def setup_directories(self):
        """Create required output directories"""
        directories = ['output_child', 'output_parent', 'output_gel_sum']
        for dir_name in directories:
            Path(dir_name).mkdir(exist_ok=True)

    def process_file(self, filepath):
        """Process a single input file"""
        try:
            # Extract file information
            ticker, temporal_period, rolling_range = self._parse_filename(filepath)

            # Load and validate input data
            df = self._load_input_file(filepath)

            # Create parent file
            parent_df = self._create_parent_file(df)

            # Process child calculations
            child_df = self._process_child_calculations(df, parent_df)

            # Update parent with child data
            parent_df = self._update_parent_with_child_data(parent_df, child_df)

            # Generate summary statistics
            self._generate_summary(parent_df, child_df, ticker)

            # Export results
            self._export_results(parent_df, child_df, ticker)

            return True

        except Exception as e:
            self.logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _parse_filename(self, filepath):
        """Parse input filename for ticker and metadata"""
        filename = Path(filepath).stem
        parts = filename.split('_')
        if len(parts) != 3:
            raise ValueError(f"Invalid filename format: {filename}")
        return parts[0], parts[1], parts[2]

    def _load_input_file(self, filepath):
        """Load and validate input file"""
        # Read CSV with date parsing
        df = pd.read_csv(filepath)

        self.logger.info(f"Initial columns: {df.columns.tolist()}")
        self.logger.info(f"Initial shape: {df.shape}")

        # Define required and optional columns
        required_cols = ['date', 'open', 'high', 'low', 'close']
        optional_cols = ['volume', 'fre_dir_given', 'rpc_given',
                        'timestamp_high', 'timestamp_low', 'bar_of_h', 'bar_of_l']

        # Validate required columns
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Convert date to datetime and set as index
        try:
            df['date'] = pd.to_datetime(df['date'])
            self.logger.info(f"Date range: {df['date'].min()} to {df['date'].max()}")
            df.sort_values('date', inplace=True)
            df.set_index('date', inplace=True)
            self.logger.info(f"Successfully set datetime index: {type(df.index)}")
            self.logger.info(f"First few index values: {df.index[:5]}")

        except Exception as e:
            self.logger.error(f"Error processing date: {str(e)}")
            self.logger.error(f"Date column sample: {df['date'].head()}")
            raise ValueError(f"Error processing date column: {str(e)}")

        # Validate OHLC relationships
        valid_mask = (
            (df['low'] <= df['open']) &
            (df['low'] <= df['close']) &
            (df['high'] >= df['open']) &
            (df['high'] >= df['close']) &
            (df['high'] - df['low'] > 0)
        )

        invalid_rows = df[~valid_mask]
        if len(invalid_rows) > 0:
            self.logger.warning(f"Removing {len(invalid_rows)} invalid rows")
            df = df[valid_mask]

        self.logger.info(f"Final shape: {df.shape}")
        return df

    def _create_parent_file(self, df):
        """Create parent file from child data"""
        parent_processor = ParentProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return parent_processor.create_parent_file(df)

    def _process_child_calculations(self, df, parent_df):
        """Process all child calculations"""
        child_processor = ChildProcessor(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return child_processor.process_calculations(df, parent_df)

    def _update_parent_with_child_data(self, parent_df, child_df):
        """Update parent file with aggregated child data"""
        updater = ParentUpdater(
            child_period=self.child_period,
            parent_period=self.parent_period
        )
        return updater.update_parent(parent_df, child_df)

    def _generate_summary(self, parent_df, child_df, ticker):
        """Generate summary statistics"""
        summary = SummaryGenerator()
        return summary.generate(parent_df, child_df, ticker)

    def _export_results(self, parent_df, child_df, ticker):
        """Export results to CSV files"""
        # Export child file
        child_path = f'output_child/{ticker}_child_{self.child_period}.csv'
        child_df.to_csv(child_path, index=False)

        # Export parent file
        parent_path = f'output_parent/{ticker}_parent_{self.parent_period}.csv'
        parent_df.to_csv(parent_path, index=False)

        self.logger.info(f"Results exported: {child_path}, {parent_path}")


class ParentProcessor(ProcessorBase):
    """Handles creation and processing of parent file"""

    def __init__(self, child_period, parent_period):
        super().__init__()
        self.child_period = child_period
        self.parent_period = parent_period

    def create_parent_file(self, df):
        """Create parent file by aggregating child data"""
        # Group by parent period
        grouped = self._group_by_parent_period(df)

        # Define base OHLC aggregation
        agg_dict = {
            'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last'
        }

        # Add volume if it exists
        if 'volume' in df.columns:
            agg_dict['volume'] = 'sum'

        # Aggregate using available columns
        parent_df = grouped.agg(agg_dict).reset_index()

        # Add reference fields
        parent_df = self._add_reference_fields(parent_df)

        # Calculate basic indicators
        parent_df = self._calculate_basic_indicators(parent_df)

        return parent_df

    def _group_by_parent_period(self, df):
        """Group data by parent period"""
        self.logger.info(f"Grouping by {self.parent_period}, index type: {type(df.index)}")

        try:
            if self.parent_period == 'W':
                return df.groupby(pd.Grouper(freq='W-FRI'))
            elif self.parent_period == 'M':
                return df.groupby(pd.Grouper(freq='ME'))
            elif self.parent_period == 'Q':
                return df.groupby(pd.Grouper(freq='Q-DEC'))
            elif self.parent_period == 'Y':
                return df.groupby(pd.Grouper(freq='Y'))
            else:
                raise ValueError(f"Invalid parent period: {self.parent_period}")

        except Exception as e:
            self.logger.error(f"Error in grouping: {str(e)}")
            self.logger.error(f"DataFrame info: {df.info()}")
            raise

    def _add_reference_fields(self, df):
        """Add reference fields to parent dataframe"""
        df['serial_id'] = self._generate_serial_ids(df)
        df['model_type'] = 1  # bar/prior bar
        df['child_period'] = self.child_period
        df['parent_period'] = self.parent_period
        df['create_date'] = datetime.now().strftime('%Y-%m-%d')
        df['create_time'] = datetime.now().strftime('%H:%M:%S')
        return df

    def _generate_serial_ids(self, df):
        """Generate unique 13-digit serial IDs"""
        base = int(datetime.now().strftime('%Y%m%d%H%M'))
        return [f"{base}{i:03d}" for i in range(len(df))]

    def _calculate_basic_indicators(self, df):
        """Calculate basic technical indicators"""
        # Calculate range
        df['range'] = df['high'] - df['low']

        # Calculate percent_r
        df['percent_r'] = (df['close'] - df['low']) / df['range']

        # Add prior bar references
        df['ro'] = df['open'].shift(1)
        df['rh'] = df['high'].shift(1)
        df['rl'] = df['low'].shift(1)
        df['rc'] = df['close'].shift(1)

        return df


class ChildProcessor(ProcessorBase):
    """Handles child calculations including bar/prior bar and gel calculations"""

    def __init__(self, child_period, parent_period):
        super().__init__()
        self.child_period = child_period
        self.parent_period = parent_period
        self.gel_calculator = GelCalculator()
        self.prior_parent_calculator = PriorParentCalculator()

    def process_calculations(self, df, parent_df):
        """Process all child calculations"""
        try:
            self.logger.info("Starting child calculations")

            # Add trading_bop
            df = self._add_trading_bop(df)
            self.logger.info("Added trading_bop")

            # Calculate bar/prior bar
            df = self._calculate_bar_prior_bar(df)
            self.logger.info("Calculated bar/prior bar")

            # Calculate gel values
            df = self.gel_calculator.calculate_gel_values(df)
            self.logger.info("Calculated gel values")

            # Map parent data
            df = self._map_parent_data(df, parent_df)
            self.logger.info("Mapped parent data")

            # Calculate prior parent values
            df = self.prior_parent_calculator.calculate_prior_parent(df)
            self.logger.info("Calculated prior parent values")

            return df

        except Exception as e:
            self.logger.error(f"Error in process_calculations: {str(e)}")
            self.logger.error(f"DataFrame info: {df.info()}")
            raise

    def _add_trading_bop(self, df):
        """Add trading bar of parent field"""
        try:
            # Get appropriate frequency for parent period
            if self.parent_period == 'W':
                freq = 'W-FRI'
            elif self.parent_period == 'M':
                freq = 'ME'
            elif self.parent_period == 'Q':
                freq = 'Q-DEC'
            elif self.parent_period == 'Y':
                freq = 'Y'
            else:
                raise ValueError(f"Invalid parent period: {self.parent_period}")

            # Calculate trading_bop
            df['trading_bop'] = df.groupby(pd.Grouper(freq=freq)).cumcount() + 1

            self.logger.info(f"Trading_bop range: {df['trading_bop'].min()} to {df['trading_bop'].max()}")
            return df

        except Exception as e:
            self.logger.error(f"Error in _add_trading_bop: {str(e)}")
            raise

    def _calculate_bar_prior_bar(self, df):
        """Calculate bar/prior bar values"""
        try:
            # REU/RED calculations
            high_diff = df['high'].diff().fillna(0.0)
            low_diff = df['low'].diff().fillna(0.0)

            # REU calculations
            df['reu_value'] = np.where(high_diff > 0, high_diff, 0).astype(float)
            df['reu_flag'] = (df['reu_value'] > 0).astype(int)

            # RED calculations
            df['red_value'] = np.where(low_diff < 0, abs(low_diff), 0).astype(float)
            df['red_flag'] = (df['red_value'] > 0).astype(int)

            # Combined calculations
            df['re_value'] = df['reu_value'] + df['red_value']
            df['re_flag'] = ((df['reu_flag'] == 1) | (df['red_flag'] == 1)).astype(int)

            self.logger.info("Bar/prior bar calculations completed")
            self.logger.info(f"REU flags: {df['reu_flag'].sum()}, RED flags: {df['red_flag'].sum()}")

            return df

        except Exception as e:
            self.logger.error(f"Error in _calculate_bar_prior_bar: {str(e)}")
            raise

    def _map_parent_data(self, df, parent_df):
        """Map parent data to child DataFrame"""
        try:
            # Create parent lookup date based on child date
            parent_df = parent_df.copy()
            df = df.copy()

            # Add parent OHLC data
            df['parent_open'] = self._lookup_parent_data(df, parent_df, 'open')
            df['parent_high'] = self._lookup_parent_data(df, parent_df, 'high')
            df['parent_low'] = self._lookup_parent_data(df, parent_df, 'low')
            df['parent_close'] = self._lookup_parent_data(df, parent_df, 'close')

            # Calculate parent derived fields
            df['parent_range'] = df['parent_high'] - df['parent_low']
            df['parent_percent_r'] = (df['parent_close'] - df['parent_low']) / df['parent_range']

            # Calculate parent ce_percent
            shifted_percent_r = df['parent_percent_r'].shift(1)
            df['parent_ce_percent'] = np.where(
                shifted_percent_r >= 0.5,
                1 - shifted_percent_r,
                shifted_percent_r
            )

            # Calculate parent EPC fields
            df['parent_epc'] = np.ceil(df['parent_ce_percent'] / 0.1).clip(1, 5)
            df['parent_epc_dir'] = (df['parent_percent_r'].shift(1) >= 0.5).astype(int)
            df['parent_epc_hp'] = (df['parent_ce_percent'] < 0.25).astype(int)

            self.logger.info("Parent data mapping completed")
            return df

        except Exception as e:
            self.logger.error(f"Error mapping parent data: {str(e)}")
            raise

    def _lookup_parent_data(self, child_df, parent_df, column):
        """Look up parent data for each child row"""
        try:
            # Get appropriate frequency for parent period
            if self.parent_period == 'W':
                freq = 'W-FRI'
            elif self.parent_period == 'M':
                freq = 'ME'
            elif self.parent_period == 'Q':
                freq = 'Q-DEC'
            elif self.parent_period == 'Y':
                freq = 'Y'
            else:
                raise ValueError(f"Invalid parent period: {self.parent_period}")

            # Create parent period index
            parent_dates = pd.date_range(
                start=child_df.index.min(),
                end=child_df.index.max(),
                freq=freq
            )

            # Create lookup series
            lookup = pd.Series(index=parent_dates, data=parent_df[column].values)

            # Forward fill to align with child dates
            return lookup.reindex(child_df.index, method='ffill')

        except Exception as e:
            self.logger.error(f"Error looking up parent data for column {column}: {str(e)}")
            raise

    def _validate_results(self, df):
        """Validate calculation results"""
        try:
            # Check for missing values
            missing_vals = df.isnull().sum()
            if missing_vals.any():
                self.logger.warning(f"Missing values found:\n{missing_vals[missing_vals > 0]}")

            # Validate numeric ranges
            if (df['reu_value'] < 0).any() or (df['red_value'] < 0).any():
                raise ValueError("Found negative REU/RED values")

            # Validate flags
            flag_cols = [col for col in df.columns if col.endswith('_flag')]
            for col in flag_cols:
                if not df[col].isin([0, 1, np.nan]).all():
                    raise ValueError(f"Invalid flag values in {col}")

            # Validate trading_bop sequence
            bop_gaps = df.groupby(pd.Grouper(freq=self._get_parent_freq()))['trading_bop'].apply(
                lambda x: not x.reset_index(drop=True).equals(pd.Series(range(1, len(x) + 1)))
            )
            if bop_gaps.any():
                self.logger.warning("Found gaps in trading_bop sequence")

            return True

        except Exception as e:
            self.logger.error(f"Validation error: {str(e)}")
            raise

    def _get_parent_freq(self):
        """Get pandas frequency string for parent period"""
        freq_map = {
            'W': 'W-FRI',
            'M': 'ME',
            'Q': 'Q-DEC',
            'Y': 'Y'
        }
        return freq_map.get(self.parent_period, 'ME')


class GelCalculator(ProcessorBase):
    """Handles Gel calculations including expansions and pattern recognition"""

    def __init__(self):
        super().__init__()

    def calculate_gel_values(self, df):
        """Calculate all gel-related values"""
        try:
            # Initialize gel OHLC
            df = self._initialize_gel_ohlc(df)

            # Calculate gel ranges and percentages
            df = self._calculate_gel_ranges(df)

            # Calculate gel expansions
            df = self._calculate_gel_expansions(df)

            # Calculate gel patterns
            df = self._calculate_gel_patterns(df)

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_values: {str(e)}")
            self.logger.error(f"DataFrame info: {df.info()}")
            raise

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        try:
            # Initialize gel_open
            df['gel_open'] = df['open'].copy()
            mask = (df['trading_bop'] != 1)
            df.loc[mask, 'gel_open'] = df['gel_open'].shift(1)

            # Initialize gel_high with high values
            df['gel_high'] = df['high'].copy()
            df.loc[mask, 'gel_high'] = df.groupby(pd.Grouper(freq='ME'))['high'].transform('cummax')

            # Initialize gel_low with low values
            df['gel_low'] = df['low'].copy()
            df.loc[mask, 'gel_low'] = df.groupby(pd.Grouper(freq='ME'))['low'].transform('cummin')

            # Initialize gel_close
            df['gel_close'] = df['close']

            self.logger.info("Gel OHLC initialization completed")
            return df

        except Exception as e:
            self.logger.error(f"Error in initialize_gel_ohlc: {str(e)}")
            self.logger.error(f"DataFrame columns: {df.columns.tolist()}")
            raise

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        try:
            df['gel_range'] = df['gel_high'] - df['gel_low']
            df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

            # Calculate gel ce_percent
            mask = (df['trading_bop'] != 1)
            shifted_percent_r = df['gel_percent_r'].shift(1)
            df['gel_ce_percent'] = np.nan
            df.loc[mask & (shifted_percent_r >= 0.5), 'gel_ce_percent'] = 1 - shifted_percent_r[mask & (shifted_percent_r >= 0.5)]
            df.loc[mask & (shifted_percent_r < 0.5), 'gel_ce_percent'] = shifted_percent_r[mask & (shifted_percent_r < 0.5)]

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_ranges: {str(e)}")
            raise

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        try:
            # Initialize columns with zeros
            df['gel_reu_value'] = 0.0
            df['gel_red_value'] = 0.0

            mask = (df['trading_bop'] != 1)

            # Calculate differences
            high_diff = df['gel_high'].diff().fillna(0.0)
            low_diff = df['gel_low'].diff().fillna(0.0)

            # Update values using numpy where conditions
            df.loc[mask & (high_diff > 0), 'gel_reu_value'] = high_diff[mask & (high_diff > 0)].astype(float)
            df.loc[mask & (low_diff < 0), 'gel_red_value'] = abs(low_diff[mask & (low_diff < 0)]).astype(float)

            # Set expansion flags
            df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
            df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
            df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_expansions: {str(e)}")
            raise

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        try:
            # Calculate EPC and related fields
            df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
            df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
            df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

            # Calculate E1/E2 values
            df['gel_e1_value'] = np.where(df['gel_epc_dir'] == 1,
                                      df['gel_reu_value'],
                                      df['gel_red_value'])

            df['gel_e2_value'] = np.where(df['gel_epc_dir'] == 1,
                                      df['gel_red_value'],
                                      df['gel_reu_value'])

            # Calculate flags
            df['gel_e1_flag'] = (df['gel_e1_value'] > 0).astype(int)
            df['gel_e2_flag'] = (df['gel_e2_value'] > 0).astype(int)
            df['gel_e1_fre_flag'] = df['e1_fre_flag']  # Map from input

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_patterns: {str(e)}")
            raise

    def _initialize_gel_ohlc(self, df):
        """Initialize gel OHLC values"""
        try:
            # Initialize gel_open
            df['gel_open'] = df['open'].copy()
            mask = (df['trading_bop'] != 1)
            df.loc[mask, 'gel_open'] = df['gel_open'].shift(1)

            # Initialize gel_high with high values
            df['gel_high'] = df['high'].copy()
            df.loc[mask, 'gel_high'] = np.maximum(df['high'], df['gel_high'].shift(1))

            # Initialize gel_low with low values
            df['gel_low'] = df['low'].copy()
            df.loc[mask, 'gel_low'] = np.minimum(df['low'], df['gel_low'].shift(1))

            # Initialize gel_close
            df['gel_close'] = df['close']

            self.logger.info("Gel OHLC initialization completed")
            return df

        except Exception as e:
            self.logger.error(f"Error in initialize_gel_ohlc: {str(e)}")
            self.logger.error(f"DataFrame columns: {df.columns.tolist()}")
            raise

    def _calculate_gel_ranges(self, df):
        """Calculate gel ranges and percentages"""
        try:
            df['gel_range'] = df['gel_high'] - df['gel_low']
            df['gel_percent_r'] = (df['gel_close'] - df['gel_low']) / df['gel_range']

            # Calculate gel ce_percent
            mask = (df['trading_bop'] != 1)
            shifted_percent_r = df['gel_percent_r'].shift(1)
            df['gel_ce_percent'] = np.nan
            df.loc[mask & (shifted_percent_r >= 0.5), 'gel_ce_percent'] = 1 - shifted_percent_r
            df.loc[mask & (shifted_percent_r < 0.5), 'gel_ce_percent'] = shifted_percent_r

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_ranges: {str(e)}")
            raise

    def _calculate_gel_expansions(self, df):
        """Calculate gel expansion values"""
        try:
            # Calculate REU/RED for gel
            mask = (df['trading_bop'] != 1)

            # Initialize with zeros
            df['gel_reu_value'] = 0
            df['gel_red_value'] = 0

            # Calculate expansions where needed
            high_diff = df['gel_high'] - df['gel_high'].shift(1)
            low_diff = df['gel_low'] - df['gel_low'].shift(1)

            df.loc[mask & (high_diff > 0), 'gel_reu_value'] = high_diff
            df.loc[mask & (low_diff < 0), 'gel_red_value'] = abs(low_diff)

            # Set expansion flags
            df['gel_reu_flag'] = (df['gel_reu_value'] > 0).astype(int)
            df['gel_red_flag'] = (df['gel_red_value'] > 0).astype(int)
            df['gel_re_value'] = df['gel_reu_value'] + df['gel_red_value']

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_expansions: {str(e)}")
            raise

    def _calculate_gel_patterns(self, df):
        """Calculate gel pattern indicators"""
        try:
            # Calculate EPC and related fields
            df['gel_epc'] = np.ceil(df['gel_ce_percent'] / 0.1).clip(1, 5)
            df['gel_epc_dir'] = (df['gel_percent_r'].shift(1) >= 0.5).astype(int)
            df['gel_epc_hp'] = (df['gel_ce_percent'] < 0.25).astype(int)

            # Calculate E1/E2 values
            df['gel_e1_value'] = np.where(df['gel_epc_dir'] == 1,
                                        df['gel_reu_value'],
                                        df['gel_red_value'])

            df['gel_e2_value'] = np.where(df['gel_epc_dir'] == 1,
                                        df['gel_red_value'],
                                        df['gel_reu_value'])

            return df

        except Exception as e:
            self.logger.error(f"Error in calculate_gel_patterns: {str(e)}")
            raise

class PriorParentCalculator:
    """Handles calculations related to prior parent period relationships"""

    def __init__(self):
        pass

    def calculate_prior_parent(self, df):
        """Calculate all prior parent related values"""
        # Initialize parent reference values
        df = self._initialize_parent_refs(df)

        # Calculate ranges and percentages
        df = self._calculate_parent_ranges(df)

        # Calculate expansions against prior parent
        df = self._calculate_parent_expansions(df)

        # Calculate patterns and directions
        df = self._calculate_parent_patterns(df)

        return df

    def _initialize_parent_refs(self, df):
        """Initialize references to parent values"""
        df['gelp_open'] = df['parent_open']

        # Running high/low against parent
        df['gelp_high'] = df.apply(
            lambda x: x['parent_high'] if x['trading_bop'] == 1
            else max(x['parent_high'], x['gel_high'].shift(1)), axis=1
        )

        df['gelp_low'] = df.apply(
            lambda x: x['parent_low'] if x['trading_bop'] == 1
            else min(x['parent_low'], x['gel_low'].shift(1)), axis=1
        )

        df['gelp_close'] = df['close'].shift(1)

        return df

    def _calculate_parent_ranges(self, df):
        """Calculate ranges and percentages against parent"""
        df['gelp_range'] = df['gelp_high'] - df['gelp_low']
        df['gelp_percent_r'] = (df['gelp_close'] - df['gelp_low']) / df['gelp_range']

        df['gelp_ce_percent'] = df.apply(
            lambda x: 1 - x['gelp_percent_r'] if x['gelp_percent_r'] >= 0.5
            else x['gelp_percent_r'], axis=1
        )

        return df

    def _calculate_parent_expansions(self, df):
        """Calculate expansions against parent values"""
        # Calculate REU/RED against parent
        df['gelp_reu_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else x['high'] - x['gelp_high'] if x['high'] > x['gelp_high']
            else 0, axis=1
        )

        df['gelp_red_value'] = df.apply(
            lambda x: np.nan if x['trading_bop'] == 1
            else abs(x['low'] - x['gelp_low']) if x['low'] < x['gelp_low']
            else 0, axis=1
        )

        # Set expansion flags
        df['gelp_reu_flag'] = (df['gelp_reu_value'] > 0).astype(int)
        df['gelp_red_flag'] = (df['gelp_red_value'] > 0).astype(int)
        df['gelp_re_value'] = df['gelp_reu_value'] + df['gelp_red_value']

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5).fillna(0).astype(int)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int).fillna(0).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int).fillna(0).astype(int)

        # Initialize 'gelp_rpc' as string type
        df['gelp_rpc'] = ''

        # Ensure 'gelp_dir' is string before shifting
        df['gelp_dir'] = df['gelp_dir'].astype(str)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else str(df['gelp_dir'].shift(1).fillna("").loc[x.name]) if x['gelp_re_flag'] == 0
            else "0" if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else "1" if x['gelp_reu_flag'] == 1
            else str(df['gelp_dir'].shift(1).fillna("").loc[x.name]), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else str(df['gelp_rpc'].shift(1).fillna("").loc[x.name]) if str(df['gelp_dir'].shift(1).fillna("").loc[x.name]) == str(df['gelp_dir'].shift(1).fillna("").loc[x.name])
            else "2" if x['gelp_twoway'] == 1 and str(df['gelp_fre_dir'].fillna("").loc[x.name]) == str(df['gelp_dir'].shift(1).fillna("").loc[x.name])
            else "1", axis=1
        )

        return df

    def _calculate_parent_patterns(self, df):
        """Calculate pattern indicators against parent"""
        # Calculate EPC and related fields
        df['gelp_epc'] = np.ceil(df['gelp_ce_percent'] / 0.1).clip(1, 5)
        df['gelp_epc_dir'] = (df['gelp_percent_r'] >= 0.5).astype(int)
        df['gelp_epc_hp_flag'] = (df['gelp_ce_percent'] < 0.25).astype(int)

        # Initialize 'gelp_rpc' as string type
        df['gelp_rpc'] = ''

        # Ensure 'gelp_dir' is string before shifting
        df['gelp_dir'] = df['gelp_dir'].astype(str)

        # Calculate direction and RPC
        df['gelp_dir'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else str(df['gelp_dir'].shift(1).fillna("").loc[x.name]) if x['gelp_re_flag'] == 0
            else "0" if x['gelp_twoway'] == 1 and x['gelp_fre_dir'] == 1
            else "1" if x['gelp_reu_flag'] == 1
            else str(df['gelp_dir'].shift(1).fillna("").loc[x.name]), axis=1
        )

        df['gelp_rpc'] = df.apply(
            lambda x: "" if x['trading_bop'] == 1
            else str(df['gelp_rpc'].shift(1).fillna("").loc[x.name]) if str(df['gelp_dir'].shift(1).fillna("").loc[x.name]) == str(df['gelp_dir'].shift(1).fillna("").loc[x.name])
            else "2" if x['gelp_twoway'] == 1 and str(df['gelp_fre_dir'].fillna("").loc[x.name]) == str(df['gelp_dir'].shift(1).fillna("").loc[x.name])
            else "1", axis=1
        )

        return df


class ParentUpdater(ProcessorBase):
    """Updates parent file with aggregated child data"""

    def __init__(self, child_period, parent_period):
        super().__init__()
        self.child_period = child_period
        self.parent_period = parent_period

    def update_parent(self, parent_df, child_df):
        """Update parent with aggregated child data"""
        # Calculate intrabar counts
        intrabar_counts = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).agg({
            'reu_flag': 'sum',
            'red_flag': 'sum',
            'rpc': 'sum'
        }).rename(columns={
            'reu_flag': 'intrabar_reu_count',
            'red_flag': 'intrabar_red_count',
            'rpc': 'intrabar_rpc'
        })

        # Calculate priorbar counts
        priorbar_counts = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).agg({
            'gelp_reu_flag': 'sum',
            'gelp_red_flag': 'sum',
            'gelp_red_flag': 'sum'  # Using red_flag for rpc as per spec
        }).rename(columns={
            'gelp_reu_flag': 'priorbar_reu_count',
            'gelp_red_flag': 'priorbar_red_count',
            'gelp_red_flag': 'priorbar_rpc'
        })

        # Find first/last RE positions
        def get_positions(group):
            re_bars = group[group['gelp_re_flag'] == 1]['trading_bop']
            return pd.Series({
                'priorbar_first_re': re_bars.iloc[0] if len(re_bars) > 0 else None,
                'priorbar_last_re': re_bars.iloc[-1] if len(re_bars) > 0 else None
            })

        positions = child_df.groupby(pd.Grouper(key='date', freq=self.parent_period)).apply(get_positions)

        # Merge all updates into parent
        parent_df = parent_df.join(intrabar_counts, how='left')
        parent_df = parent_df.join(priorbar_counts, how='left')
        parent_df = parent_df.join(positions, how='left')

        return parent_df

class SummaryGenerator(ProcessorBase):
    """Generates summary statistics and analysis"""

    def __init__(self):
        super().__init__()

    def generate(self, parent_df, child_df, ticker):
        """Generate complete summary statistics"""
        summary = {}

        # Parent level statistics
        summary.update(self._generate_parent_stats(parent_df))

        # Child level statistics
        summary.update(self._generate_child_stats(child_df))

        # Pattern analysis
        summary.update(self._analyze_patterns(parent_df, child_df))

        # Export summary
        self._export_summary(summary, ticker)

        return summary

    def _generate_parent_stats(self, df):
        """Generate parent level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        # E1 FRE flag by EPC HP
        e1_hp_stats = df[df['epc_hp'] == 1]['e1_fre_flag'].value_counts()
        stats['e1_fre_flag_hp_counts'] = e1_hp_stats.to_dict()
        stats['e1_fre_flag_hp_percentages'] = (e1_hp_stats / len(df[df['epc_hp'] == 1]) * 100).to_dict()

        # Range histogram
        stats['range_histogram'] = df['range'].describe().to_dict()

        return stats

    def _generate_child_stats(self, df):
        """Generate child level statistics"""
        stats = {}

        # E1 FRE flag analysis
        e1_stats = df['e1_fre_flag'].value_counts()
        stats['child_e1_fre_flag_counts'] = e1_stats.to_dict()
        stats['child_e1_fre_flag_percentages'] = (e1_stats / len(df) * 100).to_dict()

        return stats

    def _analyze_patterns(self, parent_df, child_df):
        """Analyze patterns across parent and child data"""
        patterns = {}

        # Analyze expansion patterns
        patterns['avg_expansion_by_period'] = child_df.groupby('trading_bop')['re_value'].mean().to_dict()

        # Analyze direction persistence
        patterns['direction_persistence'] = self._calculate_direction_persistence(child_df)

        # Analyze high/low positioning
        patterns['hl_position_stats'] = self._analyze_hl_positions(child_df)

        return patterns

    def _calculate_direction_persistence(self, df):
        """Calculate statistics about direction persistence"""
        return {
            'avg_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().mean(),
            'max_streak_length': df.groupby((df['gelp_dir'] != df['gelp_dir'].shift(1)).cumsum())['gelp_dir'].count().max()
        }

    def _analyze_hl_positions(self, df):
        """Analyze high/low position patterns"""
        return {
            'early_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) & (df['trading_bop'] <= 2)]),
            'middle_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                               (df['trading_bop'] > 2) & (df['trading_bop'] < df['parent_duration'] - 1)]),
            'late_hl': len(df[(df['bar_of_h'].notna() | df['bar_of_l'].notna()) &
                             (df['trading_bop'] >= df['parent_duration'] - 1)])
        }

    def _export_summary(self, summary, ticker):
        """Export summary statistics to CSV"""
        # Convert nested dict to flat format for CSV
        flat_summary = self._flatten_dict(summary)

        # Create DataFrame and export
        summary_df = pd.DataFrame([flat_summary])
        summary_df.to_csv(f'output_gel_sum/{ticker}_summary.csv', index=False)

    def _flatten_dict(self, d, parent_key='', sep='_'):
        """Flatten nested dictionary for CSV export"""
        items = []
        for k, v in d.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, dict):
                items.extend(self._flatten_dict(v, new_key, sep=sep).items())
            else:
                items.append((new_key, v))
        return dict(items)

def main():
    # Initialize processor
    processor = GetSetsProcessor(child_period='D', parent_period='M')

    # Process all files in input directory
    input_dir = Path('/content/input')

    if not input_dir.exists():
        raise FileNotFoundError(f"Input directory not found: {input_dir}")

    # Process each CSV file
    for file_path in input_dir.glob('*.csv'):
        try:
            print(f"\nProcessing file: {file_path}")
            processor.process_file(file_path)
            print(f"Successfully processed: {file_path}")

        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")
            logging.error(f"Error processing {file_path}: {str(e)}", exc_info=True)

# Run the processor
main()

 1.74999200e-02 1.77499770e-01 3.24993100e-02 1.22501380e-01
 1.02499010e-01 5.32499310e-01 3.40000150e-01 1.70000080e-01
 6.50005300e-02 3.14998630e-01 5.25016800e-02 3.00006900e-02
 2.04999920e-01 5.10000230e-01 1.19998940e-01 1.00002200e-02
 1.57501230e-01 5.67499160e-01 2.82499310e-01 4.00009200e-02
 7.49969000e-03 2.87500380e-01 3.85000230e-01 6.57499310e-01
 5.47500610e-01 4.75006100e-02 4.32498930e-01 1.25007700e-02
 4.14999010e-01 4.00009100e-02 4.02500160e-01 7.74993900e-02
 1.27500530e-01 5.50003100e-02 4.40000530e-01 2.24998470e-01
 4.12500390e-01 2.47501370e-01 9.05000690e-01 2.99987800e-02
 7.99999300e-02 2.22499840e-01 3.04998400e-01 2.09999080e-01
 9.74998500e-02 1.90000530e-01 1.24988600e-02 3.22500230e-01
 7.75012900e-02 2.07498550e-01 8.25004600e-02 1.35000230e-01
 1.37750054e+00 2.49996200e-02 2.50015200e-02 4.12500380e-01
 1.80000310e-01 1.42499920e-01 1.52500150e-01 1.72500610e-01
 2.25009900e-02 2.99999240e-01 7.50007600e-02 1.14999770e-01
 7.67499920e-01 1.059999


Processing file: /content/input/AAPL_D_1.csv
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1005 entries, 2016-01-04 to 2019-12-30
Data columns (total 28 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   open            1005 non-null   float64
 1   high            1005 non-null   float64
 2   low             1005 non-null   float64
 3   close           1005 non-null   float64
 4   trading_bop     1005 non-null   int64  
 5   reu_value       1005 non-null   float64
 6   reu_flag        1005 non-null   int64  
 7   red_value       1005 non-null   float64
 8   red_flag        1005 non-null   int64  
 9   re_value        1005 non-null   float64
 10  re_flag         1005 non-null   int64  
 11  gel_open        1005 non-null   float64
 12  gel_high        1005 non-null   float64
 13  gel_low         1005 non-null   float64
 14  gel_close       1005 non-null   float64
 15  gel_range       1005 non-null   float64
 16  gel_percent_r 

  df.loc[mask & (high_diff > 0), 'gel_reu_value'] = high_diff
  df.loc[mask & (low_diff < 0), 'gel_red_value'] = abs(low_diff)
ERROR:GetSets:Error looking up parent data for column open: Length of values (313) does not match length of index (312)
ERROR:GetSets:Error mapping parent data: Length of values (313) does not match length of index (312)
ERROR:GetSets:Error in process_calculations: Length of values (313) does not match length of index (312)
ERROR:GetSets:DataFrame info: None
ERROR:GetSets:Error processing file /content/input/XLB_D_1.csv: Length of values (313) does not match length of index (312)
ERROR:root:Error processing /content/input/XLB_D_1.csv: Length of values (313) does not match length of index (312)
Traceback (most recent call last):
  File "<ipython-input-25-3de07a088b74>", line 1015, in main
    processor.process_file(file_path)
  File "<ipython-input-25-3de07a088b74>", line 70, in process_file
    child_df = self._process_child_calculations(df, parent_df)
  File "

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6540 entries, 1998-12-22 to 2024-12-18
Data columns (total 28 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   open            6540 non-null   float64
 1   high            6540 non-null   float64
 2   low             6540 non-null   float64
 3   close           6540 non-null   float64
 4   trading_bop     6540 non-null   int64  
 5   reu_value       6540 non-null   float64
 6   reu_flag        6540 non-null   int64  
 7   red_value       6540 non-null   float64
 8   red_flag        6540 non-null   int64  
 9   re_value        6540 non-null   float64
 10  re_flag         6540 non-null   int64  
 11  gel_open        6540 non-null   float64
 12  gel_high        6540 non-null   float64
 13  gel_low         6540 non-null   float64
 14  gel_close       6540 non-null   float64
 15  gel_range       6540 non-null   float64
 16  gel_percent_r   6540 non-null   float64
 17  gel_ce_percent 