<a href="https://colab.research.google.com/github/BaronVonBussin/NewTransit/blob/main/nested_core.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

SCROLL TO BOTTOM CELL FOR BEST


In [1]:
NONONONONONOONONONON
import pandas as pd
import os

class ConnectTemporalTimeframes:
    def __init__(self, lower_period="D", higher_period="ME"):
        self.lower_period = lower_period
        self.higher_period = higher_period

    def read_csv_files(self, folder_path):
        """
        Reads all CSV files in the specified folder, skipping the second row.
        Assumes the files are named with the ticker (e.g., AAPL.csv).
        """
        data = {}
        for file in os.listdir(folder_path):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(folder_path, file),
                    #skiprows=[1],  # Skip the second row
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def process_ticker_data(self, df):
        """
        Processes data for a single ticker to calculate the requested metrics.
        """
        # Add fields for intra-period calculations
        df["intra_period_count"] = 0
        df["intra_period_high"] = 0
        df["intra_period_low"] = 0
        df["intra_period_bar_of_h"] = 0
        df["intra_period_bar_of_l"] = 0
        df["intra_period_rpc_direction"] = "N"
        df["intra_period_rpc"] = 0
        df["intra_period_reu"] = 0
        df["intra_period_red"] = 0
        df["bar_rpc_direction"] = "N"
        df["bar_rpc"] = 0

        # Group by higher period
        grouped = df.groupby(pd.Grouper(key="date", freq=self.higher_period))
        results = []

        for group_name, group in grouped:
            group = group.sort_values(by="date").reset_index(drop=True)
            intra_period_high = group.iloc[0]["high"]
            intra_period_low = group.iloc[0]["low"]
            intra_period_bar = 1
            intra_period_rpc = 0
            intra_period_rpc_direction = "N"
            higher_period_rpc_count = 0
            intra_period_bar_of_h = 0
            intra_period_bar_of_l = 0

            for idx, row in group.iterrows():
                if idx == 0:
                    # Initialize for the first row in the group
                    row["intra_period_rpc_direction"] = "N"
                    row["intra_period_rpc"] = 0
                else:
                    # Update intra-period high/low
                    intra_period_high = max(intra_period_high, row["high"])
                    intra_period_low = min(intra_period_low, row["low"])

                    # Check for range expansions
                    if row["high"] > intra_period_high:
                        row["intra_period_reu"] = row["high"] - intra_period_high
                        intra_period_high = row["high"]
                        intra_period_bar_of_h = intra_period_bar
                    else:
                        row["intra_period_reu"] = 0

                    if row["low"] < intra_period_low:
                        row["intra_period_red"] = intra_period_low - row["low"]
                        intra_period_low = row["low"]
                        intra_period_bar_of_l = intra_period_bar
                    else:
                        row["intra_period_red"] = 0

                    # Update bar_rpc and rpc_direction
                    if row["intra_period_reu"] > 0 and row["intra_period_red"] > 0:
                        row["bar_rpc"] = 2
                        row["bar_rpc_direction"] = "U" if (row["close"] - row["low"]) / (row["high"] - row["low"]) >= 0.5 else "D"
                    elif row["intra_period_reu"] > 0:
                        row["bar_rpc"] = 1
                        row["bar_rpc_direction"] = "U"
                    elif row["intra_period_red"] > 0:
                        row["bar_rpc"] = 1
                        row["bar_rpc_direction"] = "D"
                    else:
                        row["bar_rpc"] = 0
                        row["bar_rpc_direction"] = intra_period_rpc_direction

                    # Update higher period rpc
                    if row["bar_rpc"] > 0:
                        if row["bar_rpc_direction"] != intra_period_rpc_direction:
                            higher_period_rpc_count += 1
                        intra_period_rpc_direction = row["bar_rpc_direction"]

                # Increment the intra-period bar count
                row["intra_period_count"] = intra_period_bar
                intra_period_bar += 1

                # Append to results
                results.append(row)

        return pd.DataFrame(results)

    def generate_report(self, data, output_folder):
        """
        Generates a summary report for each ticker and writes the processed data to CSV.
        """
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        summary = []
        for ticker, df in data.items():
            processed_data = self.process_ticker_data(df)
            processed_data.to_csv(os.path.join(output_folder, f"{ticker}_processed.csv"), index=False)

            # Generate summary stats
            grouped = processed_data.groupby(pd.Grouper(key="date", freq=self.higher_period))
            for group_name, group in grouped:
                summary.append({
                    "Ticker": ticker,
                    "Higher_Period": group_name,
                    "Duration": group["intra_period_count"].max(),
                    "Number_Of_Directions": group["intra_period_rpc"].max(),
                    "Max_intra_period_bar_of_h": group["intra_period_bar_of_h"].max(),
                    "Max_intra_period_bar_of_l": group["intra_period_bar_of_l"].max(),
                    "Number_of_intra_period_expansions": group["bar_rpc"].sum()
                })

        # Save summary
        summary_df = pd.DataFrame(summary)
        summary_df.to_csv(os.path.join(output_folder, "summary_report.csv"), index=False)

# Example Usage
folder_path = "/content/input"  # Replace with your folder path
output_folder = "processed_output"
connector = ConnectTemporalTimeframes()
data = connector.read_csv_files(folder_path)
connector.generate_report(data, output_folder)


In [6]:
import pandas as pd
import os


class ConnectTemporalTimeframes:
    def __init__(self, lower_period="D", higher_period="ME"):
        self.lower_period = lower_period
        self.higher_period = higher_period

    def read_csv_files(self, folder_path):
        """
        Reads all CSV files in the specified folder, skipping the second row.
        Assumes the files are named with the ticker (e.g., AAPL.csv).
        """
        data = {}
        for file in os.listdir(folder_path):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(folder_path, file),
                    skiprows=[1],  # Skip the second row
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def process_ticker_data(self, df):
        """
        Processes data for a single ticker to calculate the requested metrics.
        """
        # Add fields for intra-period calculations
        df["intra_period_count"] = 0
        df["intra_period_high"] = 0
        df["intra_period_low"] = 0
        df["intra_period_bar_of_h"] = 0
        df["intra_period_bar_of_l"] = 0
        df["intra_period_rpc_direction"] = "N"
        df["intra_period_rpc"] = 0
        df["intra_period_reu"] = 0
        df["intra_period_red"] = 0
        df["bar_rpc_direction"] = "N"
        df["bar_rpc"] = 0

        # Group by higher period
        grouped = df.groupby(pd.Grouper(key="date", freq=self.higher_period))
        results = []

        for group_name, group in grouped:
            group = group.sort_values(by="date").reset_index(drop=True)
            intra_period_high = group.iloc[0]["high"]
            intra_period_low = group.iloc[0]["low"]
            intra_period_bar = 1
            intra_period_rpc = 0
            intra_period_rpc_direction = "N"
            higher_period_rpc_count = 0
            intra_period_bar_of_h = 0
            intra_period_bar_of_l = 0

            for idx, row in group.iterrows():
                if idx == 0:
                    # Initialize for the first row in the group
                    row["intra_period_high"] = row["high"]
                    row["intra_period_low"] = row["low"]
                    row["intra_period_rpc_direction"] = "N"
                    row["intra_period_rpc"] = 0
                else:
                    # Calculate intra-period high and low
                    prior_high = intra_period_high
                    prior_low = intra_period_low
                    intra_period_high = max(row["high"], prior_high)
                    intra_period_low = min(row["low"], prior_low)

                    # Check for range expansions
                    if row["high"] > prior_high:
                        row["intra_period_reu"] = row["high"] - prior_high
                        intra_period_bar_of_h = intra_period_bar
                    else:
                        row["intra_period_reu"] = 0

                    if row["low"] < prior_low:
                        row["intra_period_red"] = prior_low - row["low"]
                        intra_period_bar_of_l = intra_period_bar
                    else:
                        row["intra_period_red"] = 0

                    # Update bar_rpc and rpc_direction
                    if row["intra_period_reu"] > 0 and row["intra_period_red"] > 0:
                        row["bar_rpc"] = 2
                        row["bar_rpc_direction"] = "U" if (row["close"] - row["low"]) / (row["high"] - row["low"]) >= 0.5 else "D"
                    elif row["intra_period_reu"] > 0:
                        row["bar_rpc"] = 1
                        row["bar_rpc_direction"] = "U"
                    elif row["intra_period_red"] > 0:
                        row["bar_rpc"] = 1
                        row["bar_rpc_direction"] = "D"
                    else:
                        row["bar_rpc"] = 0
                        row["bar_rpc_direction"] = intra_period_rpc_direction

                    # Update higher period rpc
                    if row["bar_rpc"] > 0:
                        if row["bar_rpc_direction"] != intra_period_rpc_direction:
                            higher_period_rpc_count += 1
                        intra_period_rpc_direction = row["bar_rpc_direction"]

                # Increment the intra-period bar count
                row["intra_period_count"] = intra_period_bar
                intra_period_bar += 1

                # Append to results
                results.append(row)

        return pd.DataFrame(results)

    def generate_report(self, data, output_folder):
        """
        Generates a summary report for each ticker and writes the processed data to CSV.
        """
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        summary = []
        for ticker, df in data.items():
            processed_data = self.process_ticker_data(df)
            processed_data.to_csv(os.path.join(output_folder, f"{ticker}_processed.csv"), index=False)

            # Generate summary stats
            grouped = processed_data.groupby(pd.Grouper(key="date", freq=self.higher_period))
            for group_name, group in grouped:
                summary.append({
                    "Ticker": ticker,
                    "Higher_Period": group_name,
                    "Duration": group["intra_period_count"].max(),
                    "Number_Of_Directions": group["intra_period_rpc"].max(),
                    "Max_intra_period_bar_of_h": group["intra_period_bar_of_h"].max(),
                    "Max_intra_period_bar_of_l": group["intra_period_bar_of_l"].max(),
                    "Number_of_intra_period_expansions": group["bar_rpc"].sum()
                })

        # Save summary
        summary_df = pd.DataFrame(summary)
        summary_df.to_csv(os.path.join(output_folder, "summary_report.csv"), index=False)


# Example Usage
folder_path = "input"  # Replace with your folder path
output_folder = "processed_output"
connector = ConnectTemporalTimeframes()
data = connector.read_csv_files(folder_path)
connector.generate_report(data, output_folder)


In [7]:
import pandas as pd
import os


class ConnectTemporalTimeframes:
    def __init__(self, lower_period="D", higher_period="ME"):
        self.lower_period = lower_period
        self.higher_period = higher_period

    def read_csv_files(self, folder_path):
        """
        Reads all CSV files in the specified folder, skipping the second row.
        Assumes the files are named with the ticker (e.g., AAPL.csv).
        """
        data = {}
        for file in os.listdir(folder_path):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(folder_path, file),
                    skiprows=[1],  # Skip the second row
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def process_ticker_data(self, df):
        """
        Processes data for a single ticker to calculate the requested metrics.
        """
        # Add fields for intra-period calculations
        df["intra_period_count"] = 0
        df["intra_period_high"] = 0
        df["intra_period_low"] = 0
        df["intra_period_bar_of_h"] = 0
        df["intra_period_bar_of_l"] = 0
        df["intra_period_rpc_direction"] = "N"
        df["intra_period_rpc"] = 0
        df["intra_period_reu"] = 0
        df["intra_period_red"] = 0
        df["bar_rpc_direction"] = "N"
        df["bar_rpc"] = 0

        # Group by higher period
        grouped = df.groupby(pd.Grouper(key="date", freq=self.higher_period))
        results = []

        for group_name, group in grouped:
            group = group.sort_values(by="date").reset_index(drop=True)
            intra_period_high = group.iloc[0]["high"]
            intra_period_low = group.iloc[0]["low"]
            intra_period_bar = 1
            intra_period_rpc = 0
            intra_period_rpc_direction = "N"
            higher_period_rpc_count = 0
            intra_period_bar_of_h = 0
            intra_period_bar_of_l = 0
            prior_intra_period_high = 0
            prior_intra_period_low = 0

            for idx, row in group.iterrows():
                if idx == 0:
                    # Initialize for the first row in the group
                    row["intra_period_high"] = row["high"]
                    row["intra_period_low"] = row["low"]
                    row["intra_period_rpc_direction"] = "N"
                    row["intra_period_rpc"] = 0
                    prior_intra_period_high = row["high"]
                    prior_intra_period_low = row["low"]
                else:
                    # Calculate intra-period high and low
                    prior_high = prior_intra_period_high
                    prior_low = prior_intra_period_low
                    intra_period_high = max(row["high"], prior_high)
                    intra_period_low = min(row["low"], prior_low)

                    # Check for range expansions
                    if row["high"] > prior_high:
                        row["intra_period_reu"] = row["high"] - prior_high
                        intra_period_bar_of_h = intra_period_bar
                    else:
                        row["intra_period_reu"] = 0

                    if row["low"] < prior_low:
                        row["intra_period_red"] = prior_low - row["low"]
                        intra_period_bar_of_l = intra_period_bar
                    else:
                        row["intra_period_red"] = 0

                    # Update bar_rpc and rpc_direction
                    if row["intra_period_reu"] > 0 and row["intra_period_red"] > 0:
                        row["bar_rpc"] = 2
                        row["bar_rpc_direction"] = "U" if (row["close"] - row["low"]) / (row["high"] - row["low"]) >= 0.5 else "D"
                    elif row["intra_period_reu"] > 0:
                        row["bar_rpc"] = 1
                        row["bar_rpc_direction"] = "U"
                    elif row["intra_period_red"] > 0:
                        row["bar_rpc"] = 1
                        row["bar_rpc_direction"] = "D"
                    else:
                        row["bar_rpc"] = 0
                        row["bar_rpc_direction"] = intra_period_rpc_direction

                    # Update higher period rpc
                    if row["bar_rpc"] > 0:
                        if row["bar_rpc_direction"] != intra_period_rpc_direction:
                            higher_period_rpc_count += 1
                        intra_period_rpc_direction = row["bar_rpc_direction"]

                # Store current intra_period_high and low for next iteration
                prior_intra_period_high = intra_period_high
                prior_intra_period_low = intra_period_low

                # Increment the intra-period bar count
                row["intra_period_count"] = intra_period_bar
                intra_period_bar += 1

                # Append to results
                results.append(row)

        return pd.DataFrame(results)

    def generate_report(self, data, output_folder):
        """
        Generates a summary report for each ticker and writes the processed data to CSV.
        """
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        summary = []
        for ticker, df in data.items():
            processed_data = self.process_ticker_data(df)
            processed_data.to_csv(os.path.join(output_folder, f"{ticker}_processed.csv"), index=False)

            # Generate summary stats
            grouped = processed_data.groupby(pd.Grouper(key="date", freq=self.higher_period))
            for group_name, group in grouped:
                summary.append({
                    "Ticker": ticker,
                    "Higher_Period": group_name,
                    "Duration": group["intra_period_count"].max(),
                    "Number_Of_Directions": group["intra_period_rpc"].max(),
                    "Max_intra_period_bar_of_h": group["intra_period_bar_of_h"].max(),
                    "Max_intra_period_bar_of_l": group["intra_period_bar_of_l"].max(),
                    "Number_of_intra_period_expansions": group["bar_rpc"].sum()
                })

        # Save summary
        summary_df = pd.DataFrame(summary)
        summary_df.to_csv(os.path.join(output_folder, "summary_report.csv"), index=False)


# Example Usage
folder_path = "input"  # Replace with your folder path
output_folder = "processed_output"
connector = ConnectTemporalTimeframes()
data = connector.read_csv_files(folder_path)
connector.generate_report(data, output_folder)

In [11]:
import pandas as pd
import os


class ConnectTemporalTimeframes:
    def __init__(self, lower_period="D", higher_period="ME"):
        self.lower_period = lower_period
        self.higher_period = higher_period

    def read_csv_files(self, folder_path):
        """
        Reads all CSV files in the specified folder, skipping the second row.
        Assumes the files are named with the ticker (e.g., AAPL.csv).
        """
        data = {}
        for file in os.listdir(folder_path):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(folder_path, file),
                    skiprows=[1],  # Skip the second row
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def process_ticker_data(self, df):
        """
        Processes data for a single ticker to calculate the requested metrics.
        """
        # Initialize fields for processing
        df["intra_period_count"] = 0
        df["intra_period_high"] = 0
        df["intra_period_low"] = 0
        df["intra_period_bar_of_h"] = 0
        df["intra_period_bar_of_l"] = 0
        df["intra_period_rpc_direction"] = "N"
        df["intra_period_rpc"] = 0
        df["intra_period_reu"] = 0
        df["intra_period_red"] = 0
        df["bar_rpc_direction"] = "N"
        df["bar_rpc"] = 0

        # Group data by higher_period
        grouped = df.groupby(pd.Grouper(key="date", freq=self.higher_period))
        results = []

        for group_name, group in grouped:
            group = group.sort_values(by="date").reset_index(drop=True)

            # Calculate higher_period high and low
            higher_period_high = group["high"].max()
            higher_period_low = group["low"].min()

            # Initialize variables for intra-period processing
            intra_period_high = group["high"].iloc[0]
            intra_period_low = group["low"].iloc[0]
            intra_period_bar = 1
            intra_period_rpc_direction = "N"
            higher_period_rpc_count = 0

            # Run through the rows in the group
            for idx, row in group.iterrows():
                if idx == 0:
                    # Initialize first row in group
                    group.loc[idx, "intra_period_high"] = row["high"]
                    group.loc[idx, "intra_period_low"] = row["low"]
                else:
                    # Update intra-period high and low
                    group.loc[idx, "intra_period_high"] = max(row["high"], group["intra_period_high"].shift(1).iloc[idx])
                    group.loc[idx, "intra_period_low"] = min(row["low"], group["intra_period_low"].shift(1).iloc[idx])

                    # Check for range expansions
                    if row["high"] > group["intra_period_high"].shift(1).iloc[idx]:
                        group.loc[idx, "intra_period_reu"] = row["high"] - group["intra_period_high"].shift(1).iloc[idx]
                    else:
                        group.loc[idx, "intra_period_reu"] = 0

                    if row["low"] < group["intra_period_low"].shift(1).iloc[idx]:
                        group.loc[idx, "intra_period_red"] = group["intra_period_low"].shift(1).iloc[idx] - row["low"]
                    else:
                        group.loc[idx, "intra_period_red"] = 0

                # Check for matches with higher_period high/low
                if row["high"] == higher_period_high:
                    group.loc[idx, "intra_period_bar_of_h"] = intra_period_bar
                if row["low"] == higher_period_low:
                    group.loc[idx, "intra_period_bar_of_l"] = intra_period_bar

                # Update intra-period bar count
                group.loc[idx, "intra_period_count"] = intra_period_bar
                intra_period_bar += 1

            results.append(group)

        return pd.concat(results)

    def generate_report(self, data, output_folder):
        """
        Generates a summary report for each ticker and writes the processed data to CSV.
        """
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        summary = []
        for ticker, df in data.items():
            processed_data = self.process_ticker_data(df)
            processed_data.to_csv(os.path.join(output_folder, f"{ticker}_processed.csv"), index=False)

            # Generate summary stats
            grouped = processed_data.groupby(pd.Grouper(key="date", freq=self.higher_period))
            for group_name, group in grouped:
                summary.append({
                    "Ticker": ticker,
                    "Higher_Period": group_name,
                    "Duration": group["intra_period_count"].max(),
                    "Number_Of_Directions": group["intra_period_rpc"].max(),
                    "Max_intra_period_bar_of_h": group["intra_period_bar_of_h"].max(),
                    "Max_intra_period_bar_of_l": group["intra_period_bar_of_l"].max(),
                    "Number_of_intra_period_expansions": group["bar_rpc"].sum()
                })

        # Save summary
        summary_df = pd.DataFrame(summary)
        summary_df.to_csv(os.path.join(output_folder, "summary_report.csv"), index=False)


# Example Usage
folder_path = "input"  # Replace with your folder path
output_folder = "processed_output"
connector = ConnectTemporalTimeframes()
data = connector.read_csv_files(folder_path)
connector.generate_report(data, output_folder)


  group.loc[idx, "intra_period_high"] = row["high"]
  group.loc[idx, "intra_period_low"] = row["low"]
  group.loc[idx, "intra_period_red"] = group["intra_period_low"].shift(1).iloc[idx] - row["low"]
  group.loc[idx, "intra_period_reu"] = row["high"] - group["intra_period_high"].shift(1).iloc[idx]
  group.loc[idx, "intra_period_high"] = row["high"]
  group.loc[idx, "intra_period_low"] = row["low"]
  group.loc[idx, "intra_period_red"] = group["intra_period_low"].shift(1).iloc[idx] - row["low"]
  group.loc[idx, "intra_period_reu"] = row["high"] - group["intra_period_high"].shift(1).iloc[idx]
  group.loc[idx, "intra_period_high"] = row["high"]
  group.loc[idx, "intra_period_low"] = row["low"]
  group.loc[idx, "intra_period_reu"] = row["high"] - group["intra_period_high"].shift(1).iloc[idx]
  group.loc[idx, "intra_period_high"] = row["high"]
  group.loc[idx, "intra_period_low"] = row["low"]
  group.loc[idx, "intra_period_red"] = group["intra_period_low"].shift(1).iloc[idx] - row["low"]
  gr

In [12]:
import pandas as pd
import os

class ConnectTemporalPeriods:
    def __init__(self, child_period="D", parent_period="ME"):
        self.child_period = child_period
        self.parent_period = parent_period

    def read_csv_files(self, input_folder):
        """
        Reads all CSV files in the specified folder. Assumes files are named TICKER.csv
        and contain the columns: date, open, high, low, close.
        """
        data = {}
        for file in os.listdir(input_folder):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(input_folder, file),
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def group_by_parent_period(self, df):
        """
        Groups the data by the parent_period and calculates parent stats.
        Returns a DataFrame with one row per parent group and the calculated fields.
        """
        grouped = df.groupby(pd.Grouper(key="date", freq=self.parent_period))
        summary = []
        for name, group in grouped:
            summary.append({
                "date": name,
                "num_rows": len(group),
                "parent_high": group["high"].max(),
                "parent_low": group["low"].min()
            })
        return pd.DataFrame(summary)

    def process_child_period(self, df, parent_stats):
        """
        Processes the child-period data to calculate intra-period fields and parent-to-date stats.
        """
        df = df.sort_values(by="date").reset_index(drop=True)
        parent_stats = parent_stats.set_index("date")

        # Initialize new fields
        df["intra_period_count"] = 0
        df["intra_period_high"] = 0
        df["intra_period_low"] = 0
        df["intra_period_bar_of_h"] = 0
        df["intra_period_bar_of_l"] = 0
        df["rpc_direction"] = "N"
        df["bar_rpc"] = 0
        df["intra_period_reu"] = 0
        df["intra_period_red"] = 0
        df["intra_period_cumulative_rpc"] = 0

        # Iterate through the rows and update fields
        for i, row in df.iterrows():
            parent_date = row["date"].floor(self.parent_period)
            parent = parent_stats.loc[parent_date]

            # Intra-period high/low calculations
            if i == 0:
                df.loc[i, "intra_period_high"] = row["high"]
                df.loc[i, "intra_period_low"] = row["low"]
            else:
                df.loc[i, "intra_period_high"] = max(row["high"], df.loc[i - 1, "intra_period_high"])
                df.loc[i, "intra_period_low"] = min(row["low"], df.loc[i - 1, "intra_period_low"])

            # Range expansions
            prior_high = df.loc[i - 1, "intra_period_high"] if i > 0 else row["high"]
            prior_low = df.loc[i - 1, "intra_period_low"] if i > 0 else row["low"]

            if row["high"] > prior_high:
                df.loc[i, "intra_period_reu"] = row["high"] - prior_high
            if row["low"] < prior_low:
                df.loc[i, "intra_period_red"] = prior_low - row["low"]

            # Parent range expansion
            if row["high"] == parent["parent_high"]:
                df.loc[i, "intra_period_bar_of_h"] = i + 1
            if row["low"] == parent["parent_low"]:
                df.loc[i, "intra_period_bar_of_l"] = i + 1

            # RPC logic
            if i > 0:
                if row["high"] > prior_high and row["low"] < prior_low:
                    df.loc[i, "bar_rpc"] = 2
                    df.loc[i, "rpc_direction"] = "U" if (row["close"] - row["low"]) / (row["high"] - row["low"]) >= 0.5 else "D"
                elif row["high"] > prior_high:
                    df.loc[i, "bar_rpc"] = 1
                    df.loc[i, "rpc_direction"] = "U"
                elif row["low"] < prior_low:
                    df.loc[i, "bar_rpc"] = 1
                    df.loc[i, "rpc_direction"] = "D"
                else:
                    df.loc[i, "rpc_direction"] = df.loc[i - 1, "rpc_direction"]

            # Update cumulative RPC
            if i > 0:
                if df.loc[i, "rpc_direction"] != df.loc[i - 1, "rpc_direction"]:
                    df.loc[i, "intra_period_cumulative_rpc"] = df.loc[i - 1, "intra_period_cumulative_rpc"] + 1
                else:
                    df.loc[i, "intra_period_cumulative_rpc"] = df.loc[i - 1, "intra_period_cumulative_rpc"]

        return df

    def generate_reports(self, data, parent_output_folder, processed_output_folder):
        """
        Processes all tickers, generates parent and processed data, and writes to CSVs.
        """
        if not os.path.exists(parent_output_folder):
            os.makedirs(parent_output_folder)
        if not os.path.exists(processed_output_folder):
            os.makedirs(processed_output_folder)

        for ticker, df in data.items():
            print(f"Processing {ticker}...")

            # Generate parent stats
            parent_stats = self.group_by_parent_period(df)
            parent_stats.to_csv(os.path.join(parent_output_folder, f"{ticker}_parent.csv"), index=False)

            # Process child period data
            processed_data = self.process_child_period(df, parent_stats)
            processed_data.to_csv(os.path.join(processed_output_folder, f"{ticker}_processed.csv"), index=False)

            print(f"{ticker} processing complete.")

# Example Usage
input_folder = "input"
parent_output_folder = "parent_output"
processed_output_folder = "processed_output"

connector = ConnectTemporalPeriods()
data = connector.read_csv_files(input_folder)
connector.generate_reports(data, parent_output_folder, processed_output_folder)


Processing MMM...


ValueError: <MonthEnd> is a non-fixed frequency

In [13]:
def process_child_period(self, df, parent_stats):
    """
    Processes the child-period data to calculate intra-period fields and parent-to-date stats.
    """
    df = df.sort_values(by="date").reset_index(drop=True)
    parent_stats = parent_stats.set_index("date")

    # Initialize new fields
    df["intra_period_count"] = 0
    df["intra_period_high"] = 0
    df["intra_period_low"] = 0
    df["intra_period_bar_of_h"] = 0
    df["intra_period_bar_of_l"] = 0
    df["rpc_direction"] = "N"
    df["bar_rpc"] = 0
    df["intra_period_reu"] = 0
    df["intra_period_red"] = 0
    df["intra_period_cumulative_rpc"] = 0

    # Iterate through the rows and update fields
    for i, row in df.iterrows():
        # Adjust the date to match the parent period (MonthEnd)
        parent_date = row["date"] + MonthEnd(0)
        parent = parent_stats.loc[parent_date]

        # Intra-period high/low calculations
        if i == 0:
            df.loc[i, "intra_period_high"] = row["high"]
            df.loc[i, "intra_period_low"] = row["low"]
        else:
            df.loc[i, "intra_period_high"] = max(row["high"], df.loc[i - 1, "intra_period_high"])
            df.loc[i, "intra_period_low"] = min(row["low"], df.loc[i - 1, "intra_period_low"])

        # Range expansions
        prior_high = df.loc[i - 1, "intra_period_high"] if i > 0 else row["high"]
        prior_low = df.loc[i - 1, "intra_period_low"] if i > 0 else row["low"]

        if row["high"] > prior_high:
            df.loc[i, "intra_period_reu"] = row["high"] - prior_high
        if row["low"] < prior_low:
            df.loc[i, "intra_period_red"] = prior_low - row["low"]

        # Parent range expansion
        if row["high"] == parent["parent_high"]:
            df.loc[i, "intra_period_bar_of_h"] = i + 1
        if row["low"] == parent["parent_low"]:
            df.loc[i, "intra_period_bar_of_l"] = i + 1

        # RPC logic
        if i > 0:
            if row["high"] > prior_high and row["low"] < prior_low:
                df.loc[i, "bar_rpc"] = 2
                df.loc[i, "rpc_direction"] = "U" if (row["close"] - row["low"]) / (row["high"] - row["low"]) >= 0.5 else "D"
            elif row["high"] > prior_high:
                df.loc[i, "bar_rpc"] = 1
                df.loc[i, "rpc_direction"] = "U"
            elif row["low"] < prior_low:
                df.loc[i, "bar_rpc"] = 1
                df.loc[i, "rpc_direction"] = "D"
            else:
                df.loc[i, "rpc_direction"] = df.loc[i - 1, "rpc_direction"]

        # Update cumulative RPC
        if i > 0:
            if df.loc[i, "rpc_direction"] != df.loc[i - 1, "rpc_direction"]:
                df.loc[i, "intra_period_cumulative_rpc"] = df.loc[i - 1, "intra_period_cumulative_rpc"] + 1
            else:
                df.loc[i, "intra_period_cumulative_rpc"] = df.loc[i - 1, "intra_period_cumulative_rpc"]

    return df


In [14]:
import pandas as pd
import os
from pandas.tseries.offsets import MonthEnd

class ConnectTemporalPeriods:
    def __init__(self, child_period="D", parent_period="ME"):
        self.child_period = child_period
        self.parent_period = parent_period

    def read_csv_files(self, input_folder):
        """
        Reads all CSV files in the specified folder. Assumes files are named TICKER.csv
        and contain the columns: date, open, high, low, close.
        """
        data = {}
        for file in os.listdir(input_folder):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(input_folder, file),
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def group_by_parent_period(self, df):
        """
        Groups the data by the parent_period and calculates parent stats.
        Returns a DataFrame with one row per parent group and the calculated fields.
        """
        grouped = df.groupby(pd.Grouper(key="date", freq=self.parent_period))
        summary = []
        for name, group in grouped:
            summary.append({
                "date": name,
                "num_rows": len(group),
                "parent_high": group["high"].max(),
                "parent_low": group["low"].min()
            })
        return pd.DataFrame(summary)

    def process_child_period(self, df, parent_stats):
        """
        Processes the child-period data to calculate intra-period fields and parent-to-date stats.
        """
        df = df.sort_values(by="date").reset_index(drop=True)
        parent_stats = parent_stats.set_index("date")

        # Initialize new fields
        df["intra_period_count"] = 0
        df["intra_period_high"] = 0
        df["intra_period_low"] = 0
        df["intra_period_bar_of_h"] = 0
        df["intra_period_bar_of_l"] = 0
        df["rpc_direction"] = "N"
        df["bar_rpc"] = 0
        df["intra_period_reu"] = 0
        df["intra_period_red"] = 0
        df["intra_period_cumulative_rpc"] = 0

        # Iterate through the rows and update fields
        for i, row in df.iterrows():
            # Adjust the date to match the parent period (MonthEnd)
            parent_date = row["date"] + MonthEnd(0)
            parent = parent_stats.loc[parent_date]

            # Intra-period high/low calculations
            if i == 0:
                df.loc[i, "intra_period_high"] = row["high"]
                df.loc[i, "intra_period_low"] = row["low"]
            else:
                df.loc[i, "intra_period_high"] = max(row["high"], df.loc[i - 1, "intra_period_high"])
                df.loc[i, "intra_period_low"] = min(row["low"], df.loc[i - 1, "intra_period_low"])

            # Range expansions
            prior_high = df.loc[i - 1, "intra_period_high"] if i > 0 else row["high"]
            prior_low = df.loc[i - 1, "intra_period_low"] if i > 0 else row["low"]

            if row["high"] > prior_high:
                df.loc[i, "intra_period_reu"] = row["high"] - prior_high
            if row["low"] < prior_low:
                df.loc[i, "intra_period_red"] = prior_low - row["low"]

            # Parent range expansion
            if row["high"] == parent["parent_high"]:
                df.loc[i, "intra_period_bar_of_h"] = i + 1
            if row["low"] == parent["parent_low"]:
                df.loc[i, "intra_period_bar_of_l"] = i + 1

            # RPC logic
            if i > 0:
                if row["high"] > prior_high and row["low"] < prior_low:
                    df.loc[i, "bar_rpc"] = 2
                    df.loc[i, "rpc_direction"] = "U" if (row["close"] - row["low"]) / (row["high"] - row["low"]) >= 0.5 else "D"
                elif row["high"] > prior_high:
                    df.loc[i, "bar_rpc"] = 1
                    df.loc[i, "rpc_direction"] = "U"
                elif row["low"] < prior_low:
                    df.loc[i, "bar_rpc"] = 1
                    df.loc[i, "rpc_direction"] = "D"
                else:
                    df.loc[i, "rpc_direction"] = df.loc[i - 1, "rpc_direction"]

            # Update cumulative RPC
            if i > 0:
                if df.loc[i, "rpc_direction"] != df.loc[i - 1, "rpc_direction"]:
                    df.loc[i, "intra_period_cumulative_rpc"] = df.loc[i - 1, "intra_period_cumulative_rpc"] + 1
                else:
                    df.loc[i, "intra_period_cumulative_rpc"] = df.loc[i - 1, "intra_period_cumulative_rpc"]

        return df

    def generate_reports(self, data, parent_output_folder, processed_output_folder):
        """
        Processes all tickers, generates parent and processed data, and writes to CSVs.
        """
        if not os.path.exists(parent_output_folder):
            os.makedirs(parent_output_folder)
        if not os.path.exists(processed_output_folder):
            os.makedirs(processed_output_folder)

        for ticker, df in data.items():
            print(f"Processing {ticker}...")

            # Generate parent stats
            parent_stats = self.group_by_parent_period(df)
            parent_stats.to_csv(os.path.join(parent_output_folder, f"{ticker}_parent.csv"), index=False)

            # Process child period data
            processed_data = self.process_child_period(df, parent_stats)
            processed_data.to_csv(os.path.join(processed_output_folder, f"{ticker}_processed.csv"), index=False)

            print(f"{ticker} processing complete.")

# Example Usage
input_folder = "input"
parent_output_folder = "parent_output"
processed_output_folder = "processed_output"

connector = ConnectTemporalPeriods()
data = connector.read_csv_files(input_folder)
connector.generate_reports(data, parent_output_folder, processed_output_folder)


Processing MMM...


  df.loc[i, "intra_period_high"] = row["high"]
  df.loc[i, "intra_period_low"] = row["low"]
  df.loc[i, "intra_period_red"] = prior_low - row["low"]
  df.loc[i, "intra_period_reu"] = row["high"] - prior_high


MMM processing complete.
Processing AFL...


  df.loc[i, "intra_period_high"] = row["high"]
  df.loc[i, "intra_period_low"] = row["low"]
  df.loc[i, "intra_period_reu"] = row["high"] - prior_high
  df.loc[i, "intra_period_red"] = prior_low - row["low"]


AFL processing complete.
Processing AAPL...


  df.loc[i, "intra_period_high"] = row["high"]
  df.loc[i, "intra_period_low"] = row["low"]
  df.loc[i, "intra_period_reu"] = row["high"] - prior_high
  df.loc[i, "intra_period_red"] = prior_low - row["low"]


AAPL processing complete.
