<a href="https://colab.research.google.com/github/BaronVonBussin/NewTransit/blob/main/Untitled57.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Parameters
child_period = "D"  # Default: "D" (Daily)
parent_period = "M"  # Default: "M" (Monthly)
jobname = "BPB_20250105"  # Job name to include in output files

import os
import pandas as pd
from datetime import datetime

# Input and output paths
input_path = "/content/input"
output_path = "/content/output_gel"
parent_output_path = "/content/output_parent"

# Ensure output directories exist
os.makedirs(output_path, exist_ok=True)
os.makedirs(parent_output_path, exist_ok=True)

# Define the parent period lookup logic
def assign_parent_lookup_date(row, child_period, parent_period):
    """
    Assigns the parent_lookup_date based on the child and parent periods.
    """
    if child_period == "D" and parent_period == "W":  # Daily to Weekly
        year, week, _ = row['date'].isocalendar()
        return f"{year}/{week:02d}"
    elif child_period == "D" and parent_period == "M":  # Daily to Monthly
        return row['date'].strftime('%Y/%m')
    elif child_period == "M" and parent_period == "Q":  # Monthly to Quarterly
        quarter = (row['date'].month - 1) // 3 + 1
        return f"{row['date'].year}/Q{quarter}"
    elif child_period == "M" and parent_period == "Y":  # Monthly to Yearly
        return f"{row['date'].year}"
    elif child_period == "Q" and parent_period == "Y":  # Quarterly to Yearly
        return f"{row['date'].year}/Q{(row['date'].month - 1) // 3 + 1}"
    else:
        raise ValueError(f"Unsupported child-parent combination: {child_period}:{parent_period}")


# Process each file in the input directory
for file_name in os.listdir(input_path):
    # Extract metadata from file name
    if file_name.endswith('.csv'):
        parts = file_name.split('_')
        if len(parts) < 3:
            print(f"Skipping invalid file: {file_name}")
            continue

        ticker, temporal_period, rolling_range_dur = parts[:3]
        temporal_period = temporal_period.upper()

        # Load the file
        input_file = os.path.join(input_path, file_name)
        data = pd.read_csv(input_file)

        # Validate mandatory fields
        required_columns = ['date', 'open', 'high', 'low', 'close']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Mandatory field '{col}' missing in {file_name}")

        # Convert date column to datetime
        data['date'] = pd.to_datetime(data['date'])

        # Assign parent_lookup_date
        data['parent_lookup_date'] = data.apply(
            lambda row: assign_parent_lookup_date(row, period=temporal_period), axis=1
        )

        # Group and sort
        data = data.sort_values(by=['parent_lookup_date', 'date'])
        grouped = data.groupby('parent_lookup_date')

        # Initialize parent fields
        data['gel_reu_value'] = 0.0
        data['gel_red_value'] = 0.0
        data['gel_reu_flag'] = False
        data['gel_red_flag'] = False
        data['gel_re_flag'] = False

        # Bar-by-bar processing
        for name, group in grouped:
            previous_high = None
            previous_low = None

            for idx, row in group.iterrows():
                high = row['high']
                low = row['low']

                # Calculate reu and red values
                if previous_high is not None and high > previous_high:
                    data.loc[idx, 'gel_reu_value'] = high - previous_high
                    data.loc[idx, 'gel_reu_flag'] = True

                if previous_low is not None and low < previous_low:
                    data.loc[idx, 'gel_red_value'] = previous_low - low
                    data.loc[idx, 'gel_red_flag'] = True

                # Set re_flag
                data.loc[idx, 'gel_re_flag'] = (
                    data.loc[idx, 'gel_reu_flag'] or data.loc[idx, 'gel_red_flag']
                )

                # Update previous values
                previous_high = max(previous_high, high) if previous_high is not None else high
                previous_low = min(previous_low, low) if previous_low is not None else low

        # Generate summary metrics
        summary = grouped.agg(
            start_date=('date', 'min'),
            end_date=('date', 'max'),
            child_count=('date', 'count'),
            reu_count=('gel_reu_flag', 'sum'),
            red_count=('gel_red_flag', 'sum'),
            total_rpc=('gel_re_flag', 'sum')
        ).reset_index()

        # Add metadata
        summary['create_date'] = datetime.now().strftime('%Y-%m-%d')
        summary['create_time'] = datetime.now().strftime('%H:%M:%S')
        summary['jobname'] = f"{ticker}_output_parent_{temporal_period}"

        # Export files
        gel_output_file = os.path.join(output_path, f"{ticker}_Gel_{temporal_period}.csv")
        parent_output_file = os.path.join(parent_output_path, f"{ticker}_output_parent_{temporal_period}.csv")

        data.to_csv(gel_output_file, index=False)
        summary.to_csv(parent_output_file, index=False)

        print(f"Processed and exported: {file_name}")


Processed and exported: AAPL_D_1.csv
Processed and exported: MMM_D_1.csv
Processed and exported: AFL_D_1.csv


In [None]:
# Parameters
child_period = "D"  # Default: "D" (Daily)
parent_period = "M"  # Default: "M" (Monthly)
jobname = "BPB_20250105"  # Job name to include in output files

# Paths
input_path = "/content/input"
output_path = "/content/output_gel"
parent_output_path = "/content/output_parent"

# Ensure output directories exist
import os
import pandas as pd
from datetime import datetime

os.makedirs(output_path, exist_ok=True)
os.makedirs(parent_output_path, exist_ok=True)

# Parent lookup logic
def assign_parent_lookup_date(row, child_period, parent_period):
    if child_period == "D" and parent_period == "W":
        year, week, _ = row['date'].isocalendar()
        return f"{year}/{week:02d}"
    elif child_period == "D" and parent_period == "M":
        return row['date'].strftime('%Y/%m')
    elif child_period == "M" and parent_period == "Q":
        quarter = (row['date'].month - 1) // 3 + 1
        return f"{row['date'].year}/Q{quarter}"
    elif child_period == "M" and parent_period == "Y":
        return f"{row['date'].year}"
    elif child_period == "Q" and parent_period == "Y":
        return f"{row['date'].year}/Q{(row['date'].month - 1) // 3 + 1}"
    else:
        raise ValueError(f"Unsupported child-parent combination: {child_period}:{parent_period}")

# Process each input file
for file_name in os.listdir(input_path):
    if file_name.endswith('.csv'):
        parts = file_name.split('_')
        if len(parts) < 3:
            print(f"Skipping invalid file: {file_name}")
            continue

        ticker = parts[0]  # Extract Ticker
        print(f"Processing {file_name} for Ticker: {ticker}")

        # Load and validate data
        input_file = os.path.join(input_path, file_name)
        data = pd.read_csv(input_file)
        data['date'] = pd.to_datetime(data['date'])
        required_columns = ['date', 'open', 'high', 'low', 'close']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Missing required column: {col} in {file_name}")

        # Assign parent_lookup_date
        data['parent_lookup_date'] = data.apply(
            lambda row: assign_parent_lookup_date(row, child_period, parent_period), axis=1
        )

        # Ensure mandatory and calculated columns
        mandatory_columns = ['date', 'open', 'high', 'low', 'close', 'parent_lookup_date']
        calculated_columns = [
            'gel_reu_value', 'gel_red_value', 'gel_reu_flag', 'gel_red_flag',
            'gel_re_flag', 'gel_range', 'gel_percent_r', 'gel_ce_percent', 'gel_epc_dir'
        ]
        for col in mandatory_columns + calculated_columns:
            if col not in data.columns:
                data[col] = None

        # Export gel file
        gel_output_file = os.path.join(output_path, f"{ticker}_Gel_{child_period}.csv")
        data.to_csv(gel_output_file, index=False)

        # Generate summary metrics for parent export
        grouped = data.groupby('parent_lookup_date')
        summary = grouped.agg(
            start_date=('date', 'min'),
            end_date=('date', 'max'),
            child_count=('date', 'count'),
            reu_count=('gel_reu_flag', 'sum'),
            red_count=('gel_red_flag', 'sum'),
            total_rpc=('gel_re_flag', 'sum')
        ).reset_index()

        # Add metadata to summary
        summary['create_date'] = datetime.now().strftime('%Y-%m-%d')
        summary['create_time'] = datetime.now().strftime('%H:%M:%S')
        summary['jobname'] = jobname

        # Export parent summary file
        parent_output_file = os.path.join(parent_output_path, f"{ticker}_output_parent_{parent_period}.csv")
        summary.to_csv(parent_output_file, index=False)

        print(f"Exported Gel File: {gel_output_file}")
        print(f"Exported Parent Summary: {parent_output_file}")


Processing AAPL_D_1.csv for Ticker: AAPL
Exported Gel File: /content/output_gel/AAPL_Gel_D.csv
Exported Parent Summary: /content/output_parent/AAPL_output_parent_M.csv
Processing MMM_D_1.csv for Ticker: MMM
Exported Gel File: /content/output_gel/MMM_Gel_D.csv
Exported Parent Summary: /content/output_parent/MMM_output_parent_M.csv
Processing AFL_D_1.csv for Ticker: AFL
Exported Gel File: /content/output_gel/AFL_Gel_D.csv
Exported Parent Summary: /content/output_parent/AFL_output_parent_M.csv


In [None]:
import os
import pandas as pd
from datetime import datetime

# Parameters
child_period = "D"  # Default: "D" (Daily)
parent_period = "M"  # Default: "M" (Monthly)
jobname = "BPB_20250105"  # Job name to include in output files

# Paths
input_path = "/content/input"
output_path = "/content/output_gel"
parent_output_path = "/content/output_parent"

# Ensure output directories exist
os.makedirs(output_path, exist_ok=True)
os.makedirs(parent_output_path, exist_ok=True)

# Parent lookup logic
def assign_parent_lookup_date(row, child_period, parent_period):
    if child_period == "D" and parent_period == "W":
        year, week, _ = row['date'].isocalendar()
        return f"{year}/{week:02d}"
    elif child_period == "D" and parent_period == "M":
        return row['date'].strftime('%Y/%m')
    elif child_period == "M" and parent_period == "Q":
        quarter = (row['date'].month - 1) // 3 + 1
        return f"{row['date'].year}/Q{quarter}"
    elif child_period == "M" and parent_period == "Y":
        return f"{row['date'].year}"
    elif child_period == "Q" and parent_period == "Y":
        return f"{row['date'].year}/Q{(row['date'].month - 1) // 3 + 1}"
    else:
        raise ValueError(f"Unsupported child-parent combination: {child_period}:{parent_period}")

# Process each input file
for file_name in os.listdir(input_path):
    if file_name.endswith('.csv'):
        parts = file_name.split('_')
        if len(parts) < 3:
            print(f"Skipping invalid file: {file_name}")
            continue

        ticker = parts[0]  # Extract Ticker
        print(f"Processing {file_name} for Ticker: {ticker}")

        # Load and validate data
        input_file = os.path.join(input_path, file_name)
        data = pd.read_csv(input_file)
        data['date'] = pd.to_datetime(data['date'])
        required_columns = ['date', 'open', 'high', 'low', 'close']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Missing required column: {col} in {file_name}")

        # Assign parent_lookup_date
        data['parent_lookup_date'] = data.apply(
            lambda row: assign_parent_lookup_date(row, child_period, parent_period), axis=1
        )

        # Sort data for processing
        data = data.sort_values(by=['parent_lookup_date', 'date'])

        # Initialize calculated fields
        calculated_fields = [
            'gel_reu_value', 'gel_red_value', 'gel_reu_flag', 'gel_red_flag', 'gel_re_flag',
            'gel_range', 'gel_percent_r', 'gel_ce_percent', 'gel_epc_dir', 'gel_epc',
            'gel_epc_hp', 'gel_rpc', 'gel_e1_value', 'gel_e2_value', 'gel_fre_dir'
        ]
        for field in calculated_fields:
            data[field] = None  # Initialize all calculated fields with None

        # Process each parent group
        grouped = data.groupby('parent_lookup_date')
        for parent, group in grouped:
            previous_high = None
            previous_low = None

            for idx, row in group.iterrows():
                high = row['high']
                low = row['low']

                # Calculate gel_reu and gel_red values
                gel_reu_value = max(0, high - previous_high) if previous_high is not None else 0
                gel_red_value = max(0, previous_low - low) if previous_low is not None else 0

                # Update calculated fields
                data.at[idx, 'gel_reu_value'] = gel_reu_value
                data.at[idx, 'gel_red_value'] = gel_red_value
                data.at[idx, 'gel_reu_flag'] = gel_reu_value > 0
                data.at[idx, 'gel_red_flag'] = gel_red_value > 0
                data.at[idx, 'gel_re_flag'] = (gel_reu_value > 0) or (gel_red_value > 0)

                # Update previous values
                previous_high = max(previous_high, high) if previous_high is not None else high
                previous_low = min(previous_low, low) if previous_low is not None else low

        # Export Gel File
        gel_output_file = os.path.join(output_path, f"{ticker}_Gel_{child_period}.csv")
        data.to_csv(gel_output_file, index=False)

        # Generate summary metrics for parent export
        summary = grouped.agg(
            start_date=('date', 'min'),
            end_date=('date', 'max'),
            child_count=('date', 'count'),
            reu_count=('gel_reu_flag', 'sum'),
            red_count=('gel_red_flag', 'sum'),
            total_rpc=('gel_re_flag', 'sum')
        ).reset_index()

        # Add metadata to summary
        summary['create_date'] = datetime.now().strftime('%Y-%m-%d')
        summary['create_time'] = datetime.now().strftime('%H:%M:%S')
        summary['jobname'] = jobname

        # Export parent summary file
        parent_output_file = os.path.join(parent_output_path, f"{ticker}_output_parent_{parent_period}.csv")
        summary.to_csv(parent_output_file, index=False)

        print(f"Exported Gel File: {gel_output_file}")
        print(f"Exported Parent Summary: {parent_output_file}")


Processing AAPL_D_1.csv for Ticker: AAPL
Exported Gel File: /content/output_gel/AAPL_Gel_D.csv
Exported Parent Summary: /content/output_parent/AAPL_output_parent_M.csv
Processing MMM_D_1.csv for Ticker: MMM
Exported Gel File: /content/output_gel/MMM_Gel_D.csv
Exported Parent Summary: /content/output_parent/MMM_output_parent_M.csv
Processing AFL_D_1.csv for Ticker: AFL
Exported Gel File: /content/output_gel/AFL_Gel_D.csv
Exported Parent Summary: /content/output_parent/AFL_output_parent_M.csv


In [None]:
import os
import pandas as pd
from datetime import datetime

# Parameters
child_period = "D"  # Default: "D" (Daily)
parent_period = "M"  # Default: "M" (Monthly)
jobname = "BPB_20250105"  # Job name to include in output files

# Paths
input_path = "/content/input"
output_path = "/content/output_gel"
parent_output_path = "/content/output_parent"

# Ensure output directories exist
os.makedirs(output_path, exist_ok=True)
os.makedirs(parent_output_path, exist_ok=True)

# Parent lookup logic
def assign_parent_lookup_date(row, child_period, parent_period):
    if child_period == "D" and parent_period == "W":
        year, week, _ = row['date'].isocalendar()
        return f"{year}/{week:02d}"
    elif child_period == "D" and parent_period == "M":
        return row['date'].strftime('%Y/%m')
    elif child_period == "M" and parent_period == "Q":
        quarter = (row['date'].month - 1) // 3 + 1
        return f"{row['date'].year}/Q{quarter}"
    elif child_period == "M" and parent_period == "Y":
        return f"{row['date'].year}"
    elif child_period == "Q" and parent_period == "Y":
        return f"{row['date'].year}/Q{(row['date'].month - 1) // 3 + 1}"
    else:
        raise ValueError(f"Unsupported child-parent combination: {child_period}:{parent_period}")

# Process each input file
for file_name in os.listdir(input_path):
    if file_name.endswith('.csv'):
        parts = file_name.split('_')
        if len(parts) < 3:
            print(f"Skipping invalid file: {file_name}")
            continue

        ticker = parts[0]  # Extract Ticker
        rolling_range_dur = int(parts[2])  # Extract rolling range duration
        print(f"Processing {file_name} for Ticker: {ticker}")

        # Load and validate data
        input_file = os.path.join(input_path, file_name)
        data = pd.read_csv(input_file)
        data['date'] = pd.to_datetime(data['date'])
        required_columns = ['date', 'open', 'high', 'low', 'close']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Missing required column: {col} in {file_name}")

        # Add metadata fields
        data['serial_id'] = data.index + 1  # Simple row-based serial ID
        data['row_number'] = data.index + 1
        data['ticker'] = ticker
        data['rolling_range_dur'] = rolling_range_dur
        data['temporal_period'] = child_period

        # Assign parent_lookup_date
        data['parent_lookup_date'] = data.apply(
            lambda row: assign_parent_lookup_date(row, child_period, parent_period), axis=1
        )

        # Sort data for processing
        data = data.sort_values(by=['parent_lookup_date', 'date'])

        # Initialize calculated fields
        calculated_fields = [
            'gel_reu_value', 'gel_red_value', 'gel_reu_flag', 'gel_red_flag', 'gel_re_flag',
            'gel_range', 'gel_percent_r', 'gel_ce_percent', 'gel_epc_dir', 'gel_epc',
            'gel_epc_hp', 'gel_rpc', 'gel_e1_value', 'gel_e2_value', 'gel_fre_dir', 'gelo',
            'gelh', 'gell', 'gelc', 'gel_total_rpc', 'range', 'gel_period_percent_r',
            'gel_twoway', 'gel_dir_count', 'gel_e1_flag', 'gel_e2_flag', 're_flag',
            're_value', 'twoway', 'fre_dir_input', 'ro', 'rh', 'rl', 'rc'
        ]
        for field in calculated_fields:
            data[field] = None  # Initialize all calculated fields with None

        # Process each parent group
        grouped = data.groupby('parent_lookup_date')
        for parent, group in grouped:
            previous_high = None
            previous_low = None

            for idx, row in group.iterrows():
                high = row['high']
                low = row['low']
                close = row['close']
                open_ = row['open']

                # Calculate gel_reu and gel_red values
                gel_reu_value = max(0, high - previous_high) if previous_high is not None else 0
                gel_red_value = max(0, previous_low - low) if previous_low is not None else 0

                # Calculate bar-specific fields
                gelo = open_ if previous_high is None else previous_high
                gell = low if previous_low is None else previous_low
                gelh = high if previous_high is None else max(previous_high, high)
                gelc = close

                # Update calculated fields
                data.at[idx, 'gel_reu_value'] = gel_reu_value
                data.at[idx, 'gel_red_value'] = gel_red_value
                data.at[idx, 'gel_reu_flag'] = gel_reu_value > 0
                data.at[idx, 'gel_red_flag'] = gel_red_value > 0
                data.at[idx, 'gel_re_flag'] = (gel_reu_value > 0) or (gel_red_value > 0)
                data.at[idx, 'gelo'] = gelo
                data.at[idx, 'gell'] = gell
                data.at[idx, 'gelh'] = gelh
                data.at[idx, 'gelc'] = gelc
                data.at[idx, 'range'] = gelh - gell
                data.at[idx, 'gel_range'] = gelh - gell

                # Update previous values
                previous_high = gelh
                previous_low = gell

        # Add metadata
        data['create_date'] = datetime.now().strftime('%Y-%m-%d')
        data['create_time'] = datetime.now().strftime('%H:%M:%S')
        data['jobname'] = jobname

        # Export Gel File
        gel_output_file = os.path.join(output_path, f"{ticker}_Gel_{child_period}.csv")
        data.to_csv(gel_output_file, index=False)

        # Generate summary metrics for parent export
        summary = grouped.agg(
            start_date=('date', 'min'),
            end_date=('date', 'max'),
            child_count=('date', 'count'),
            reu_count=('gel_reu_flag', 'sum'),
            red_count=('gel_red_flag', 'sum'),
            total_rpc=('gel_re_flag', 'sum')
        ).reset_index()

        # Add metadata to summary
        summary['create_date'] = datetime.now().strftime('%Y-%m-%d')
        summary['create_time'] = datetime.now().strftime('%H:%M:%S')
        summary['jobname'] = jobname

        # Export parent summary file
        parent_output_file = os.path.join(parent_output_path, f"{ticker}_output_parent_{parent_period}.csv")
        summary.to_csv(parent_output_file, index=False)

        print(f"Exported Gel File: {gel_output_file}")
        print(f"Exported Parent Summary: {parent_output_file}")


ValueError: invalid literal for int() with base 10: '1.csv'

In [None]:
import os
import pandas as pd
from datetime import datetime

# Parameters
child_period = "D"  # Default: "D" (Daily)
parent_period = "M"  # Default: "M" (Monthly)
jobname = "BPB_20250105"  # Job name to include in output files

# Paths
input_path = "/content/input"
output_path = "/content/output_gel"
parent_output_path = "/content/output_parent"

# Ensure output directories exist
os.makedirs(output_path, exist_ok=True)
os.makedirs(parent_output_path, exist_ok=True)

# Parent lookup logic
def assign_parent_lookup_date(row, child_period, parent_period):
    if child_period == "D" and parent_period == "W":
        year, week, _ = row['date'].isocalendar()
        return f"{year}/{week:02d}"
    elif child_period == "D" and parent_period == "M":
        return row['date'].strftime('%Y/%m')
    elif child_period == "M" and parent_period == "Q":
        quarter = (row['date'].month - 1) // 3 + 1
        return f"{row['date'].year}/Q{quarter}"
    elif child_period == "M" and parent_period == "Y":
        return f"{row['date'].year}"
    elif child_period == "Q" and parent_period == "Y":
        return f"{row['date'].year}/Q{(row['date'].month - 1) // 3 + 1}"
    else:
        raise ValueError(f"Unsupported child-parent combination: {child_period}:{parent_period}")

# Process each input file
for file_name in os.listdir(input_path):
    if file_name.endswith('.csv'):
        parts = file_name.split('_')
        if len(parts) < 3:
            print(f"Skipping invalid file: {file_name}")
            continue

        ticker = parts[0]
        rolling_range_dur = int(parts[2].split('.')[0])
        print(f"Processing {file_name} for Ticker: {ticker}")

        # Load and validate data
        input_file = os.path.join(input_path, file_name)
        data = pd.read_csv(input_file)
        data['date'] = pd.to_datetime(data['date'])
        required_columns = ['date', 'open', 'high', 'low', 'close']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Missing required column: {col} in {file_name}")

        # Add metadata fields
        data['serial_id'] = data.index + 1
        data['row_number'] = data.index + 1
        data['ticker'] = ticker
        data['rolling_range_dur'] = rolling_range_dur
        data['temporal_period'] = child_period

        # Assign parent_lookup_date
        data['parent_lookup_date'] = data.apply(
            lambda row: assign_parent_lookup_date(row, child_period, parent_period), axis=1
        )

        # Sort data for processing
        data = data.sort_values(by=['parent_lookup_date', 'date'])

        # Initialize calculated fields
        calculated_fields = [
            'gel_rpc', 'gel_e1_value', 'gel_e2_value', 'gel_fre_dir',
            'gel_total_rpc', 'range', 'gel_period_percent_r', 'gel_twoway',
            'gel_dir_count', 'gel_e1_flag', 'gel_e2_flag', 're_flag', 're_value',
            'twoway', 'fre_dir_input'
        ]
        for field in calculated_fields:
            data[field] = None

        # Process each parent group
        grouped = data.groupby('parent_lookup_date')
        for parent, group in grouped:
            previous_gelo = None
            previous_gelh = None
            previous_gell = None
            previous_gelc = None

            # Assign trading_bop
            group = group.sort_values('date')
            group['trading_bop'] = range(1, len(group) + 1)

            for idx, row in group.iterrows():
                high = row['high']
                low = row['low']
                close = row['close']
                open_ = row['open']

                if row['trading_bop'] == 1:
                    gelo = open_
                    gelh = high
                    gell = low
                    gelc = close
                else:
                    gelo = previous_gelo
                    gelh = max(previous_gelh, high) if previous_gelh is not None else high
                    gell = min(previous_gell, low) if previous_gell is not None else low
                    gelc = close

                # Calculate basic metrics
                range_ = high - low
                gel_range = gelh - gell
                gel_percent_r = (gelc - gell) / gel_range if gel_range > 0 else 0
                gel_ce_percent = (1 - gel_percent_r) if gel_percent_r >= 0.5 else gel_percent_r
                gel_epc = round(gel_ce_percent / 0.1, 0)
                gel_epc_dir = "U" if gel_percent_r >= 0.5 else "D"
                gel_epc_hp = gel_ce_percent >= 0.25

                # Calculate RPC-related metrics
                gel_rpc = 1 if gel_reu_value > 0 or gel_red_value > 0 else 0
                gel_total_rpc = (data.at[idx - 1, 'gel_total_rpc'] if idx > 0 else 0) + gel_rpc
                gel_e1_value = gel_reu_value if gel_epc_dir == "U" else gel_red_value
                gel_e2_value = gel_red_value if gel_epc_dir == "U" else gel_reu_value
                gel_fre_dir = (
                    "U" if gel_reu_value > 0 else "D" if gel_red_value > 0 else "N"
                )

                # Populate prior row values for the current row
                ro = previous_gelo
                rh = previous_gelh
                rl = previous_gell
                rc = previous_gelc

                # Update fields
                data.at[idx, 'range'] = range_
                data.at[idx, 'gel_period_percent_r'] = gel_percent_r
                data.at[idx, 'gel_rpc'] = gel_rpc
                data.at[idx, 'gel_total_rpc'] = gel_total_rpc
                data.at[idx, 'gel_e1_value'] = gel_e1_value
                data.at[idx, 'gel_e2_value'] = gel_e2_value
                data.at[idx, 'gel_fre_dir'] = gel_fre_dir
                data.at[idx, 're_flag'] = gel_reu_value > 0 or gel_red_value > 0
                data.at[idx, 'twoway'] = gel_reu_value > 0 and gel_red_value > 0

                # Update previous values
                previous_gelo = gelo
                previous_gelh = gelh
                previous_gell = gell
                previous_gelc = gelc

        # Add metadata
        data['create_date'] = datetime.now().strftime('%Y-%m-%d')
        data['create_time'] = datetime.now().strftime('%H:%M:%S')
        data['jobname'] = jobname

        # Export Gel File
        gel_output_file = os.path.join(output_path, f"{ticker}_Gel_{child_period}.csv")
        data.to_csv(gel_output_file, index=False)

        # Generate summary metrics for parent export
        summary = grouped.agg(
            start_date=('date', 'min'),
            end_date=('date', 'max'),
            child_count=('date', 'count'),
            reu_count=('gel_reu_flag', 'sum'),
            red_count=('gel_red_flag', 'sum'),
            total_rpc=('gel_re_flag', 'sum')
        ).reset_index()

        # Add metadata to summary
        summary['create_date'] = datetime.now().strftime('%Y-%m-%d')
        summary['create_time'] = datetime.now().strftime('%H:%M:%S')
        summary['jobname'] = jobname

        # Export parent summary file
        parent_output_file = os.path.join(parent_output_path, f"{ticker}_output_parent_{parent_period}.csv")
        summary.to_csv(parent_output_file, index=False)

        print(f"Exported Gel File: {gel_output_file}")
        print(f"Exported Parent Summary: {parent_output_file}")


Processing AAPL_D_1.csv for Ticker: AAPL


KeyError: "Column(s) ['gel_re_flag', 'gel_red_flag', 'gel_reu_flag'] do not exist"

In [None]:
import os
import pandas as pd
from datetime import datetime

# Parameters
child_period = "D"  # Default: "D" (Daily)
parent_period = "M"  # Default: "M" (Monthly)
jobname = "BPB_20250105"  # Job name to include in output files

# Paths
input_path = "/content/input"
output_path = "/content/output_gel"
parent_output_path = "/content/output_parent"

# Ensure output directories exist
os.makedirs(output_path, exist_ok=True)
os.makedirs(parent_output_path, exist_ok=True)

# Parent lookup logic
def assign_parent_lookup_date(row, child_period, parent_period):
    if child_period == "D" and parent_period == "W":
        year, week, _ = row['date'].isocalendar()
        return f"{year}/{week:02d}"
    elif child_period == "D" and parent_period == "M":
        return row['date'].strftime('%Y/%m')
    elif child_period == "M" and parent_period == "Q":
        quarter = (row['date'].month - 1) // 3 + 1
        return f"{row['date'].year}/Q{quarter}"
    elif child_period == "M" and parent_period == "Y":
        return f"{row['date'].year}"
    elif child_period == "Q" and parent_period == "Y":
        return f"{row['date'].year}/Q{(row['date'].month - 1) // 3 + 1}"
    else:
        raise ValueError(f"Unsupported child-parent combination: {child_period}:{parent_period}")

for file_name in os.listdir(input_path):
    if file_name.endswith('.csv'):
        parts = file_name.split('_')
        if len(parts) < 3:
            print(f"Skipping invalid file: {file_name}")
            continue

        ticker = parts[0]
        rolling_range_dur = int(parts[2].split('.')[0])
        print(f"Processing {file_name} for Ticker: {ticker}")

        # Load and validate data
        input_file = os.path.join(input_path, file_name)
        data = pd.read_csv(input_file)
        data['date'] = pd.to_datetime(data['date'])
        required_columns = ['date', 'open', 'high', 'low', 'close']
        for col in required_columns:
            if col not in data.columns:
                raise ValueError(f"Missing required column: {col} in {file_name}")

        required_columns_for_summary = ['gel_re_flag', 'gel_red_flag', 'gel_reu_flag']

        # Add metadata fields
        data['serial_id'] = data.index + 1
        data['row_number'] = data.index + 1
        data['ticker'] = ticker
        data['rolling_range_dur'] = rolling_range_dur
        data['temporal_period'] = child_period

        # Assign parent_lookup_date
        data['parent_lookup_date'] = data.apply(
            lambda row: assign_parent_lookup_date(row, child_period, parent_period), axis=1
        )

        # Sort data for processing
        data = data.sort_values(by=['parent_lookup_date', 'date'])

        # Initialize calculated fields
        calculated_fields = [
            'gel_rpc', 'gel_e1_value', 'gel_e2_value', 'gel_fre_dir',
            'gel_total_rpc', 'range', 'gel_period_percent_r', 'gel_twoway',
            'gel_dir_count', 'gel_e1_flag', 'gel_e2_flag', 're_flag', 'gel_re_flag',
            'gel_red_flag', 'gel_reu_flag', 're_value', 'twoway', 'fre_dir_input'
        ]
        for field in calculated_fields:
            data[field] = None

        # Process each parent group
        grouped = data.groupby('parent_lookup_date')
        for parent, group in grouped:
            previous_gelo = None
            previous_gelh = None
            previous_gell = None
            previous_gelc = None

            # Assign trading_bop
            group = group.sort_values('date')
            group['trading_bop'] = range(1, len(group) + 1)

            for idx, row in group.iterrows():
                high = row['high']
                low = row['low']
                close = row['close']
                open_ = row['open']

                if row['trading_bop'] == 1:
                    gelo = open_
                    gelh = high
                    gell = low
                    gelc = close
                else:
                    gelo = previous_gelo
                    gelh = max(previous_gelh, high) if previous_gelh is not None else high
                    gell = min(previous_gell, low) if previous_gell is not None else low
                    gelc = close

                # Calculate basic metrics
                range_ = high - low
                gel_range = gelh - gell
                gel_percent_r = (gelc - gell) / gel_range if gel_range > 0 else 0

                # Calculate flags
                gel_reu_flag = gel_reu_value > 0
                gel_red_flag = gel_red_value > 0
                gel_re_flag = gel_reu_flag or gel_red_flag

                # Update fields
                data.at[idx, 'gel_reu_flag'] = gel_reu_flag
                data.at[idx, 'gel_red_flag'] = gel_red_flag
                data.at[idx, 'gel_re_flag'] = gel_re_flag

                # Update previous values
                previous_gelo = gelo
                previous_gelh = gelh
                previous_gell = gell
                previous_gelc = gelc

        # Ensure fields for aggregation
        for col in required_columns_for_summary:
            if col not in data.columns:
                data[col] = False

        # Generate summary metrics
        summary = grouped.agg(
            start_date=('date', 'min'),
            end_date=('date', 'max'),
            child_count=('date', 'count'),
            reu_count=('gel_reu_flag', 'sum'),
            red_count=('gel_red_flag', 'sum'),
            total_rpc=('gel_re_flag', 'sum')
        ).reset_index()

        # Export files
        gel_output_file = os.path.join(output_path, f"{ticker}_Gel_{child_period}.csv")
        data.to_csv(gel_output_file, index=False)

        parent_output_file = os.path.join(parent_output_path, f"{ticker}_output_parent_{parent_period}.csv")
        summary.to_csv(parent_output_file, index=False)


Processing AAPL_D_1.csv for Ticker: AAPL
Processing MMM_D_1.csv for Ticker: MMM
Processing AFL_D_1.csv for Ticker: AFL
