<a href="https://colab.research.google.com/github/BaronVonBussin/NewTransit/blob/main/domains_goodbeta_20241230.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Domains

Steps:
1.   Determine rolling ranges up to N depth.
2.   Identify domains established with a rolling range fails to expand.
3.   Assign a domain type to each domain: primary, nested, inside.
4.   Assign "true" domains by rolling range duration.
5.   Track time-to-expand (TTE) and continuation.


In [None]:
import pandas as pd
import os
from dataclasses import dataclass
from typing import List, Dict, Optional
import logging

# Configuration settings
INPUT_DIRECTORY = '/content/input'  # Set your input directory path here
FILE_FORMAT = '{TICKER}_{PERIOD}.csv'    # Expected format of input files
VALID_PERIODS = ['D', 'W', 'M', 'Q', 'Y']  # Valid period values
DOMAIN_DEPTH = 12  # Set your desired depth
DOMAIN_PRIMARY_CHECK_DURATION = 20
FIRST_EXPANSION_DISTANCE = 6
SECOND_EXPANSION_DISTANCE = 12

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class Domain:
    domain_id: int
    ticker: str
    domain_rolling_range_duration: int
    domain_captive_date: str
    domain_row: int
    domain_forecast_bars: int
    domain_type: int
    domain_open: float
    domain_high: float
    domain_low: float
    domain_close_original: float
    domain_close_first_captive: float
    domain_range: float
    domain_percentr_original: float
    domain_bias_dir_original: int  # 1=Up, 2=Down
    domain_expansion_dir: Optional[int] = None  # 1=Up, 2=Down
    domain_bias_ftt: Optional[int] = None
    first_expansion_reu: Optional[float] = None
    first_expansion_red: Optional[float] = None
    second_expansion_reu: Optional[float] = None
    second_expansion_red: Optional[float] = None
    domain_captive_count: int = 1
    domain_captive_high: float = None
    domain_captive_low: float = None
    domain_percentr_active: float = None
    domain_bias_active: int = None
    domain_close_last: float = None
    domain_true_flag: int = None
    domain_true_duration: Optional[int] = None
    status: str = 'open'

    def __post_init__(self):
        if self.domain_captive_high is None:
            self.domain_captive_high = float('-inf')
        if self.domain_captive_low is None:
            self.domain_captive_low = float('inf')

class DomainProcessor:
    def __init__(self,
                 domain_depth: int = DOMAIN_DEPTH,
                 domain_primary_check_duration: int = DOMAIN_PRIMARY_CHECK_DURATION,
                 first_expansion_distance: int = FIRST_EXPANSION_DISTANCE,
                 second_expansion_distance: int = SECOND_EXPANSION_DISTANCE):
        self.domain_depth = domain_depth
        self.domain_primary_check_duration = domain_primary_check_duration
        self.first_expansion_distance = first_expansion_distance
        self.second_expansion_distance = second_expansion_distance
        self.domain_counter = 100000
        self.domains: List[Domain] = []
        self.ticker = None

    def process_file(self, filepath: str) -> pd.DataFrame:
        """Process a single input file and return domains DataFrame."""
        try:
            # Extract ticker from filepath
            filename = os.path.basename(filepath)
            self.ticker = filename.replace('.csv', '').split('_')[0]

            df = pd.read_csv(filepath)
            required_cols = ['date', 'open', 'high', 'low', 'close']
            if not all(col in df.columns for col in required_cols):
                raise ValueError(f"Missing required columns. Required: {required_cols}")

            df.columns = df.columns.str.lower()
            df['row_num'] = range(1, len(df) + 1)

            for duration in range(1, self.domain_depth + 1):
                self._process_rolling_range(df, duration)

            self._reduce_overlapping_domains()

            domains_df = pd.DataFrame([vars(d) for d in self.domains])
            return domains_df

        except Exception as e:
            logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _process_rolling_range(self, df: pd.DataFrame, duration: int):
        total_rows = len(df)

        for i in range(duration, total_rows):
            rolling_range = df.iloc[i-duration:i]
            current_bar = df.iloc[i]

            rolling_high = rolling_range['high'].max()
            rolling_low = rolling_range['low'].min()
            rolling_open = rolling_range.iloc[0]['open']
            rolling_close = rolling_range.iloc[-1]['close']

            if (current_bar['high'] <= rolling_high and
                current_bar['low'] >= rolling_low):

                domain_type = self._determine_domain_type(df.iloc[i]['row_num'], duration)
                forecast_bars = total_rows - i - 1

                new_domain = Domain(
                    domain_id=self.domain_counter,
                    ticker=self.ticker,
                    domain_rolling_range_duration=duration,
                    domain_captive_date=str(current_bar['date']),
                    domain_row=current_bar['row_num'],
                    domain_forecast_bars=forecast_bars,
                    domain_type=domain_type,
                    domain_open=rolling_open,
                    domain_high=rolling_high,
                    domain_low=rolling_low,
                    domain_close_original=rolling_close,
                    domain_close_first_captive=current_bar['close'],
                    domain_range=rolling_high - rolling_low,
                    domain_percentr_original=(rolling_close - rolling_low) / (rolling_high - rolling_low),
                    domain_bias_dir_original=1 if (rolling_close - rolling_low) / (rolling_high - rolling_low) >= 0.5 else 2,
                )

                self.domains.append(new_domain)
                self.domain_counter += 1

            self._update_open_domains(df, current_bar, i, duration)

    def _determine_domain_type(self, current_row: int, duration: int) -> int:
        """Determine domain type (1=Primary, 2=Nested, 3=Inside)."""
        relevant_domains = [d for d in self.domains
                          if d.status == 'open' and
                          d.domain_rolling_range_duration == duration and
                          current_row - d.domain_row <= self.domain_primary_check_duration]

        if not relevant_domains:
            return 1  # Primary

        last_domain = relevant_domains[-1]
        row_diff = current_row - last_domain.domain_row

        if row_diff == 1:
            return 3  # Inside

        # Check if nested within any primary domain
        primary_domains = [d for d in relevant_domains if d.domain_type == 1]
        for domain in primary_domains:
            if (domain.domain_high >= last_domain.domain_high and
                domain.domain_low <= last_domain.domain_low):
                return 2  # Nested

        return 1  # Primary

    def _update_open_domains(self, df: pd.DataFrame, current_bar: pd.Series,
                           current_idx: int, duration: int):
        """Update metrics for open domains and calculate expansions."""
        for domain in [d for d in self.domains if d.status == 'open' and
                      d.domain_rolling_range_duration == duration]:

            # Check if domain is expanded
            if (current_bar['high'] > domain.domain_high or
                current_bar['low'] < domain.domain_low):

                domain.status = 'closed'
                domain.domain_expansion_dir = 1 if current_bar['high'] > domain.domain_high else 2
                domain.domain_bias_ftt = 1 if domain.domain_expansion_dir == domain.domain_bias_dir_original else 0

                # Calculate expansion metrics if enough forecast bars available
                if domain.domain_forecast_bars >= max(self.first_expansion_distance,
                                                    self.second_expansion_distance):
                    self._calculate_expansions(df, domain, current_idx)
            else:
                # Update captive metrics
                domain.domain_captive_count += 1
                domain.domain_captive_high = max(domain.domain_captive_high, current_bar['high'])
                domain.domain_captive_low = min(domain.domain_captive_low, current_bar['low'])
                domain.domain_close_last = current_bar['close']
                domain.domain_percentr_active = ((current_bar['close'] - domain.domain_low) /
                                               domain.domain_range)
                domain.domain_bias_active = 1 if domain.domain_percentr_active >= 0.5 else 2

    def _calculate_expansions(self, df: pd.DataFrame, domain: Domain, current_idx: int):
        """Calculate first and second expansion metrics."""
        # First expansion window
        first_window = df.iloc[current_idx:current_idx + self.first_expansion_distance]
        domain.first_expansion_reu = max(0, first_window['high'].max() - domain.domain_high)
        domain.first_expansion_red = max(0, domain.domain_low - first_window['low'].min())

        # Second expansion window
        if domain.domain_forecast_bars >= self.second_expansion_distance:
            second_window = df.iloc[current_idx:current_idx + self.second_expansion_distance]
            domain.second_expansion_reu = max(0, second_window['high'].max() - domain.domain_high)
            domain.second_expansion_red = max(0, domain.domain_low - second_window['low'].min())

    def _reduce_overlapping_domains(self):
        """Identify true domains and update related fields."""
        domains_by_date = {}
        for domain in self.domains:
            if domain.domain_captive_date not in domains_by_date:
                domains_by_date[domain.domain_captive_date] = []
            domains_by_date[domain.domain_captive_date].append(domain)

        for date_domains in domains_by_date.values():
            date_domains.sort(key=lambda x: x.domain_rolling_range_duration)

            current_high = None
            current_low = None
            current_true_duration = None

            for domain in date_domains:
                if current_high is None or current_low is None:
                    current_high = domain.domain_high
                    current_low = domain.domain_low
                    current_true_duration = domain.domain_rolling_range_duration
                    domain.domain_true_flag = 1
                elif domain.domain_high == current_high and domain.domain_low == current_low:
                    domain.domain_true_flag = 0
                    domain.domain_true_duration = current_true_duration
                else:
                    current_high = domain.domain_high
                    current_low = domain.domain_low
                    current_true_duration = domain.domain_rolling_range_duration
                    domain.domain_true_flag = 1

def process_input_directory(input_dir: str = INPUT_DIRECTORY,
                          domain_depth: int = DOMAIN_DEPTH,
                          domain_primary_check_duration: int = DOMAIN_PRIMARY_CHECK_DURATION,
                          first_expansion_distance: int = FIRST_EXPANSION_DISTANCE,
                          second_expansion_distance: int = SECOND_EXPANSION_DISTANCE):
    """Process all files in the input directory."""
    try:
        os.makedirs('domain_output', exist_ok=True)
        os.makedirs('domain_summary', exist_ok=True)

        for filename in os.listdir(input_dir):
            if filename.endswith('.csv'):
                logger.info(f"Processing {filename}")

                processor = DomainProcessor(
                    domain_depth=domain_depth,
                    domain_primary_check_duration=domain_primary_check_duration,
                    first_expansion_distance=first_expansion_distance,
                    second_expansion_distance=second_expansion_distance
                )

                filepath = os.path.join(input_dir, filename)
                domains_df = processor.process_file(filepath)

                ticker, temporal = filename.replace('.csv', '').split('_')

                detailed_output = f"domain_output/{ticker}_Domains_{temporal}.csv"
                domains_df.to_csv(detailed_output, index=False)

                summary_df = create_summary(domains_df)
                summary_output = f"domain_summary/{ticker}_DomainSum_{temporal}.csv"
                summary_df.to_csv(summary_output, index=False)

                logger.info(f"Completed processing {filename}")

    except Exception as e:
        logger.error(f"Error in process_input_directory: {str(e)}")
        raise

def create_summary(domains_df: pd.DataFrame) -> pd.DataFrame:
    """Create summary statistics for domains."""
    grouped = domains_df.groupby(['domain_rolling_range_duration', 'domain_type'])

    summary_data = []
    for (duration, domain_type), group in grouped:
        total_count = len(group)
        follow_thru_count = group['domain_bias_ftt'].sum()
        follow_thru_pct = (follow_thru_count / total_count * 100) if total_count > 0 else 0

        summary_data.append({
            'rolling_range_duration': duration,
            'domain_type': domain_type,
            'count': total_count,
            'edge_bias_follow_count': follow_thru_count,
            'edge_bias_follow_pct': follow_thru_pct
        })

    return pd.DataFrame(summary_data)

if __name__ == "__main__":
    try:
        process_input_directory()
        logger.info(f"Completed processing all files in {INPUT_DIRECTORY}")
    except Exception as e:
        logger.error(f"Program terminated with error: {str(e)}")

In [None]:
import pandas as pd
import os
from dataclasses import dataclass
from typing import List, Dict, Optional
import logging

# Configuration settings
INPUT_DIRECTORY = '/content/input'  # Set your input directory path here
FILE_FORMAT = '{TICKER}_{PERIOD}.csv'    # Expected format of input files
VALID_PERIODS = ['D', 'W', 'M', 'Q', 'Y']  # Valid period values
RRDURATION = 4  # Set your desired rolling range duration
DOMAIN_PRIMARY_CHECK_DURATION = 20
FIRST_EXPANSION_DISTANCE = 6
SECOND_EXPANSION_DISTANCE = 12

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class Domain:
    domain_id: int
    ticker: str
    domain_rolling_range_duration: int
    domain_captive_date: str
    domain_row: int
    domain_forecast_bars: int
    domain_type: int
    domain_open: float
    domain_high: float
    domain_low: float
    domain_close_original: float
    domain_close_first_captive: float
    domain_range: float
    domain_percentr_original: float
    domain_bias_dir_original: int  # 1=Up, 2=Down
    domain_expansion_dir: Optional[int] = None  # 1=Up, 2=Down
    domain_bias_ftt: Optional[int] = None
    first_expansion_reu: Optional[float] = None
    first_expansion_red: Optional[float] = None
    second_expansion_reu: Optional[float] = None
    second_expansion_red: Optional[float] = None
    domain_captive_count: int = 1
    domain_captive_high: float = None
    domain_captive_low: float = None
    domain_percentr_active: float = None
    domain_bias_active: int = None
    domain_close_last: float = None
    domain_true_flag: int = None
    domain_true_duration: Optional[int] = None
    status: str = 'open'

    def __post_init__(self):
        if self.domain_captive_high is None:
            self.domain_captive_high = float('-inf')
        if self.domain_captive_low is None:
            self.domain_captive_low = float('inf')

class DomainProcessor:
    def __init__(self,
                 rrduration: int = RRDURATION,
                 domain_primary_check_duration: int = DOMAIN_PRIMARY_CHECK_DURATION,
                 first_expansion_distance: int = FIRST_EXPANSION_DISTANCE,
                 second_expansion_distance: int = SECOND_EXPANSION_DISTANCE):
        self.rrduration = rrduration
        self.domain_primary_check_duration = domain_primary_check_duration
        self.first_expansion_distance = first_expansion_distance
        self.second_expansion_distance = second_expansion_distance
        self.domain_counter = 100000
        self.domains: List[Domain] = []
        self.ticker = None

    def process_file(self, filepath: str) -> pd.DataFrame:
        """Process a single input file and return domains DataFrame."""
        try:
            # Extract ticker from filepath
            filename = os.path.basename(filepath)
            self.ticker = filename.replace('.csv', '').split('_')[0]

            df = pd.read_csv(filepath)
            required_cols = ['date', 'open', 'high', 'low', 'close']
            if not all(col in df.columns for col in required_cols):
                raise ValueError(f"Missing required columns. Required: {required_cols}")

            df.columns = df.columns.str.lower()
            df['row_num'] = range(1, len(df) + 1)

            self._process_rolling_range(df)
            self._reduce_overlapping_domains()

            domains_df = pd.DataFrame([vars(d) for d in self.domains])
            return domains_df

        except Exception as e:
            logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _process_rolling_range(self, df: pd.DataFrame):
        total_rows = len(df)
        duration = self.rrduration

        for i in range(duration, total_rows):
            rolling_range = df.iloc[i-duration:i]
            current_bar = df.iloc[i]

            rolling_high = rolling_range['high'].max()
            rolling_low = rolling_range['low'].min()
            rolling_open = rolling_range.iloc[0]['open']
            rolling_close = rolling_range.iloc[-1]['close']

            if (current_bar['high'] <= rolling_high and
                current_bar['low'] >= rolling_low):

                domain_type = self._determine_domain_type(df.iloc[i]['row_num'])
                forecast_bars = total_rows - i - 1

                new_domain = Domain(
                    domain_id=self.domain_counter,
                    ticker=self.ticker,
                    domain_rolling_range_duration=duration,
                    domain_captive_date=str(current_bar['date']),
                    domain_row=current_bar['row_num'],
                    domain_forecast_bars=forecast_bars,
                    domain_type=domain_type,
                    domain_open=rolling_open,
                    domain_high=rolling_high,
                    domain_low=rolling_low,
                    domain_close_original=rolling_close,
                    domain_close_first_captive=current_bar['close'],
                    domain_range=rolling_high - rolling_low,
                    domain_percentr_original=(rolling_close - rolling_low) / (rolling_high - rolling_low),
                    domain_bias_dir_original=1 if (rolling_close - rolling_low) / (rolling_high - rolling_low) >= 0.5 else 2,
                )

                self.domains.append(new_domain)
                self.domain_counter += 1

            self._update_open_domains(df, current_bar, i)

    def _determine_domain_type(self, current_row: int) -> int:
        """Determine domain type (1=Primary, 2=Nested, 3=Inside)."""
        relevant_domains = [d for d in self.domains
                          if d.status == 'open' and
                          current_row - d.domain_row <= self.domain_primary_check_duration]

        if not relevant_domains:
            return 1  # Primary

        last_domain = relevant_domains[-1]
        row_diff = current_row - last_domain.domain_row

        if row_diff == 1:
            return 3  # Inside

        # Check if nested within any primary domain
        primary_domains = [d for d in relevant_domains if d.domain_type == 1]
        for domain in primary_domains:
            if (domain.domain_high >= last_domain.domain_high and
                domain.domain_low <= last_domain.domain_low):
                return 2  # Nested

        return 1  # Primary

    def _update_open_domains(self, df: pd.DataFrame, current_bar: pd.Series,
                           current_idx: int):
        """Update metrics for open domains and calculate expansions."""
        for domain in [d for d in self.domains if d.status == 'open']:
            # Check if domain is expanded
            if (current_bar['high'] > domain.domain_high or
                current_bar['low'] < domain.domain_low):

                domain.status = 'closed'
                domain.domain_expansion_dir = 1 if current_bar['high'] > domain.domain_high else 2
                domain.domain_bias_ftt = 1 if domain.domain_expansion_dir == domain.domain_bias_dir_original else 0

                # Calculate expansion metrics if enough forecast bars available
                if domain.domain_forecast_bars >= max(self.first_expansion_distance,
                                                    self.second_expansion_distance):
                    self._calculate_expansions(df, domain, current_idx)
            else:
                # Update captive metrics
                domain.domain_captive_count += 1
                domain.domain_captive_high = max(domain.domain_captive_high, current_bar['high'])
                domain.domain_captive_low = min(domain.domain_captive_low, current_bar['low'])
                domain.domain_close_last = current_bar['close']
                domain.domain_percentr_active = ((current_bar['close'] - domain.domain_low) /
                                               domain.domain_range)
                domain.domain_bias_active = 1 if domain.domain_percentr_active >= 0.5 else 2

    def _calculate_expansions(self, df: pd.DataFrame, domain: Domain, current_idx: int):
        """Calculate first and second expansion metrics."""
        # First expansion window
        first_window = df.iloc[current_idx:current_idx + self.first_expansion_distance]
        domain.first_expansion_reu = max(0, first_window['high'].max() - domain.domain_high)
        domain.first_expansion_red = max(0, domain.domain_low - first_window['low'].min())

        # Second expansion window
        if domain.domain_forecast_bars >= self.second_expansion_distance:
            second_window = df.iloc[current_idx:current_idx + self.second_expansion_distance]
            domain.second_expansion_reu = max(0, second_window['high'].max() - domain.domain_high)
            domain.second_expansion_red = max(0, domain.domain_low - second_window['low'].min())

    def _reduce_overlapping_domains(self):
        """Identify true domains and update related fields."""
        domains_by_date = {}
        for domain in self.domains:
            if domain.domain_captive_date not in domains_by_date:
                domains_by_date[domain.domain_captive_date] = []
            domains_by_date[domain.domain_captive_date].append(domain)

        for date_domains in domains_by_date.values():
            for domain in date_domains:
                domain.domain_true_flag = 1
                domain.domain_true_duration = self.rrduration

def process_input_directory(input_dir: str = INPUT_DIRECTORY,
                          rrduration: int = RRDURATION,
                          domain_primary_check_duration: int = DOMAIN_PRIMARY_CHECK_DURATION,
                          first_expansion_distance: int = FIRST_EXPANSION_DISTANCE,
                          second_expansion_distance: int = SECOND_EXPANSION_DISTANCE):
    """Process all files in the input directory."""
    try:
        os.makedirs('domain_output', exist_ok=True)
        os.makedirs('domain_summary', exist_ok=True)

        for filename in os.listdir(input_dir):
            if filename.endswith('.csv'):
                logger.info(f"Processing {filename}")

                processor = DomainProcessor(
                    rrduration=rrduration,
                    domain_primary_check_duration=domain_primary_check_duration,
                    first_expansion_distance=first_expansion_distance,
                    second_expansion_distance=second_expansion_distance
                )

                filepath = os.path.join(input_dir, filename)
                domains_df = processor.process_file(filepath)

                ticker, period = filename.replace('.csv', '').split('_')

                # Validate period
                if period not in VALID_PERIODS:
                    logger.error(f"Invalid period {period} in file {filename}. Must be one of {VALID_PERIODS}")
                    continue

                detailed_output = f"domain_output/{ticker}_Domains_{period}.csv"
                domains_df.to_csv(detailed_output, index=False)

                summary_df = create_summary(domains_df)
                summary_output = f"domain_summary/{ticker}_DomainSum_{period}.csv"
                summary_df.to_csv(summary_output, index=False)

                logger.info(f"Completed processing {filename}")

    except Exception as e:
        logger.error(f"Error in process_input_directory: {str(e)}")
        raise

def create_summary(domains_df: pd.DataFrame) -> pd.DataFrame:
    """Create summary statistics for domains."""
    grouped = domains_df.groupby('domain_type')

    summary_data = []
    for domain_type, group in grouped:
        total_count = len(group)
        follow_thru_count = group['domain_bias_ftt'].sum()
        follow_thru_pct = (follow_thru_count / total_count * 100) if total_count > 0 else 0

        summary_data.append({
            'rolling_range_duration': RRDURATION,
            'domain_type': domain_type,
            'count': total_count,
            'edge_bias_follow_count': follow_thru_count,
            'edge_bias_follow_pct': follow_thru_pct
        })

    return pd.DataFrame(summary_data)

if __name__ == "__main__":
    try:
        process_input_directory()
        logger.info(f"Completed processing all files in {INPUT_DIRECTORY}")
    except Exception as e:
        logger.error(f"Program terminated with error: {str(e)}")

In [None]:
import pandas as pd
import os
from dataclasses import dataclass
from typing import List, Dict, Optional
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class Domain:
    domain_id: int
    domain_rolling_range_duration: int
    domain_captive_date: str
    domain_row: int
    domain_type: int
    domain_open: float
    domain_high: float
    domain_low: float
    domain_close_original: float
    domain_range: float
    domain_percentr_original: float
    domain_bias_dir_original: int
    domain_expansion_dir: Optional[int] = None
    domain_bias_ftt: Optional[int] = None
    domain_captive_count: int = 1
    domain_captive_high: float = None
    domain_captive_low: float = None
    domain_percentr_active: float = None
    domain_bias_active: int = None
    domain_close_last: float = None
    domain_true_flag: int = None
    domain_true_duration: Optional[int] = None
    status: str = 'open'

    def __post_init__(self):
        if self.domain_captive_high is None:
            self.domain_captive_high = float('-inf')
        if self.domain_captive_low is None:
            self.domain_captive_low = float('inf')

class DomainProcessor:
    def __init__(self, domain_depth: int = 12, domain_primary_check_duration: int = 20):
        self.domain_depth = domain_depth
        self.domain_primary_check_duration = domain_primary_check_duration
        self.domain_counter = 100000
        self.domains: List[Domain] = []

    def process_file(self, filepath: str) -> pd.DataFrame:
        """Process a single input file and return domains DataFrame."""
        try:
            df = pd.read_csv(filepath)
            required_cols = ['date', 'open', 'high', 'low', 'close']
            if not all(col in df.columns for col in required_cols):
                raise ValueError(f"Missing required columns. Required: {required_cols}")

            # Standardize column names
            df.columns = df.columns.str.lower()

            # Add row numbers
            df['row_num'] = range(1, len(df) + 1)

            # Process each rolling range duration
            for duration in range(1, self.domain_depth + 1):
                self._process_rolling_range(df, duration)

            # Reduce overlapping domains
            self._reduce_overlapping_domains()

            # Convert domains to DataFrame
            domains_df = pd.DataFrame([vars(d) for d in self.domains])
            return domains_df

        except Exception as e:
            logger.error(f"Error processing file {filepath}: {str(e)}")
            raise

    def _process_rolling_range(self, df: pd.DataFrame, duration: int):
        """Process a single rolling range duration."""
        for i in range(duration, len(df)):
            rolling_range = df.iloc[i-duration:i]
            current_bar = df.iloc[i]

            # Calculate rolling range metrics
            rolling_high = rolling_range['high'].max()
            rolling_low = rolling_range['low'].min()
            rolling_open = rolling_range.iloc[0]['open']
            rolling_close = rolling_range.iloc[-1]['close']

            # Check if current bar is inside the rolling range
            if (current_bar['high'] <= rolling_high and
                current_bar['low'] >= rolling_low):
                # Create new domain
                domain_type = self._determine_domain_type(df.iloc[i]['row_num'], duration)

                new_domain = Domain(
                    domain_id=self.domain_counter,
                    domain_rolling_range_duration=duration,
                    domain_captive_date=str(current_bar['date']),
                    domain_row=current_bar['row_num'],
                    domain_type=domain_type,
                    domain_open=rolling_open,
                    domain_high=rolling_high,
                    domain_low=rolling_low,
                    domain_close_original=rolling_close,
                    domain_range=rolling_high - rolling_low,
                    domain_percentr_original=(rolling_close - rolling_low) / (rolling_high - rolling_low),
                    domain_bias_dir_original=1 if (rolling_close - rolling_low) / (rolling_high - rolling_low) >= 0.5 else 0,
                )

                self.domains.append(new_domain)
                self.domain_counter += 1

            # Update existing open domains
            self._update_open_domains(current_bar, duration)

    def _determine_domain_type(self, current_row: int, duration: int) -> int:
        """Determine domain type (1=Primary, 2=Nested, 3=Inside)."""
        relevant_domains = [d for d in self.domains
                          if d.status == 'open' and
                          d.domain_rolling_range_duration == duration and
                          current_row - d.domain_row <= self.domain_primary_check_duration]

        if not relevant_domains:
            return 1  # Primary

        last_domain = relevant_domains[-1]
        row_diff = current_row - last_domain.domain_row

        if row_diff == 1:
            return 3  # Inside

        # Check if nested within any primary domain
        primary_domains = [d for d in relevant_domains if d.domain_type == 1]
        for domain in primary_domains:
            if (domain.domain_high >= last_domain.domain_high and
                domain.domain_low <= last_domain.domain_low):
                return 2  # Nested

        return 1  # Primary

    def _update_open_domains(self, current_bar: pd.Series, duration: int):
        """Update metrics for open domains."""
        for domain in [d for d in self.domains if d.status == 'open' and
                      d.domain_rolling_range_duration == duration]:
            # Check if domain is expanded
            if (current_bar['high'] > domain.domain_high or
                current_bar['low'] < domain.domain_low):
                # Close domain and update expansion metrics
                domain.status = 'closed'
                domain.domain_expansion_dir = 1 if current_bar['high'] > domain.domain_high else 0
                domain.domain_bias_ftt = 1 if domain.domain_expansion_dir == domain.domain_bias_dir_original else 0
            else:
                # Update captive metrics
                domain.domain_captive_count += 1
                domain.domain_captive_high = max(domain.domain_captive_high, current_bar['high'])
                domain.domain_captive_low = min(domain.domain_captive_low, current_bar['low'])
                domain.domain_close_last = current_bar['close']
                domain.domain_percentr_active = ((current_bar['close'] - domain.domain_low) /
                                               domain.domain_range)
                domain.domain_bias_active = 1 if domain.domain_percentr_active >= 0.5 else 0

    def _reduce_overlapping_domains(self):
        """Identify true domains and update related fields."""
        # Group domains by captive date
        domains_by_date = {}
        for domain in self.domains:
            if domain.domain_captive_date not in domains_by_date:
                domains_by_date[domain.domain_captive_date] = []
            domains_by_date[domain.domain_captive_date].append(domain)

        # Process each date group
        for date, date_domains in domains_by_date.items():
            # Sort by rolling range duration
            date_domains.sort(key=lambda x: x.domain_rolling_range_duration)

            current_high = None
            current_low = None
            current_true_duration = None

            for domain in date_domains:
                if current_high is None or current_low is None:
                    # First domain in group
                    current_high = domain.domain_high
                    current_low = domain.domain_low
                    current_true_duration = domain.domain_rolling_range_duration
                    domain.domain_true_flag = 1
                elif domain.domain_high == current_high and domain.domain_low == current_low:
                    # Same range as previous true domain
                    domain.domain_true_flag = 0
                    domain.domain_true_duration = current_true_duration
                else:
                    # New range
                    current_high = domain.domain_high
                    current_low = domain.domain_low
                    current_true_duration = domain.domain_rolling_range_duration
                    domain.domain_true_flag = 1

def process_input_directory(input_dir: str = '/content/input',
                          domain_depth: int = 12,
                          domain_primary_check_duration: int = 20):
    """Process all files in the input directory."""
    try:
        # Create output directories
        os.makedirs('domain_output', exist_ok=True)
        os.makedirs('domain_summary', exist_ok=True)

        # Process each file
        for filename in os.listdir(input_dir):
            if filename.endswith('.csv'):
                logger.info(f"Processing {filename}")

                # Initialize processor
                processor = DomainProcessor(domain_depth, domain_primary_check_duration)

                # Process file
                filepath = os.path.join(input_dir, filename)
                domains_df = processor.process_file(filepath)

                # Extract ticker and temporal period from filename
                ticker, temporal = filename.replace('.csv', '').split('_')

                # Save detailed domain file
                detailed_output = f"domain_output/{ticker}_Domains_{temporal}.csv"
                domains_df.to_csv(detailed_output, index=False)

                # Create and save summary
                summary_df = create_summary(domains_df)
                summary_output = f"domain_summary/{ticker}_DomainSum_{temporal}.csv"
                summary_df.to_csv(summary_output, index=False)

                logger.info(f"Completed processing {filename}")

    except Exception as e:
        logger.error(f"Error in process_input_directory: {str(e)}")
        raise

def create_summary(domains_df: pd.DataFrame) -> pd.DataFrame:
    """Create summary statistics for domains."""
    # Group by rolling range duration and type
    grouped = domains_df.groupby(['domain_rolling_range_duration', 'domain_type'])

    summary_data = []
    for (duration, domain_type), group in grouped:
        total_count = len(group)
        follow_thru_count = group['domain_bias_ftt'].sum()
        follow_thru_pct = (follow_thru_count / total_count * 100) if total_count > 0 else 0

        summary_data.append({
            'rolling_range_duration': duration,
            'domain_type': domain_type,
            'count': total_count,
            'edge_bias_follow_count': follow_thru_count,
            'edge_bias_follow_pct': follow_thru_pct
        })

    return pd.DataFrame(summary_data)

if __name__ == "__main__":
    try:
        process_input_directory()
    except Exception as e:
        logger.error(f"Program terminated with error: {str(e)}")