<a href="https://colab.research.google.com/github/BaronVonBussin/NewTransit/blob/main/basic_model_20241228.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import pandas as pd
import numpy as np
from math import ceil

class BasicModel:
    def __init__(self, rolling_range_duration=4):
        self.rolling_range_duration = rolling_range_duration

    def read_csv_files(self, input_folder):
        """
        Reads all CSV files from the input folder. Assumes files are named
        TICKER_{temporal_period}.csv and contain columns: date, open, high, low, close.
        """
        data = {}
        for file in os.listdir(input_folder):
            if file.endswith(".csv"):
                ticker = os.path.splitext(file)[0]
                df = pd.read_csv(
                    os.path.join(input_folder, file),
                    parse_dates=["date"]
                )
                data[ticker] = df
        return data

    def calculate_metrics(self, df):
        """
        Calculate metrics based on the rolling range and reference fields.
        """
        df = df.sort_values(by="date").reset_index(drop=True)

        # Reference Fields
        df["ref_open"] = df["open"].shift(self.rolling_range_duration)
        df["ref_high"] = df["high"].rolling(self.rolling_range_duration).max().shift(1)
        df["ref_low"] = df["low"].rolling(self.rolling_range_duration).min().shift(1)
        df["ref_close"] = df["close"].shift(1)
        df["ref_range"] = df["ref_high"] - df["ref_low"]

        # Percentage Range and Derived Fields
        df["ref_percentr"] = (df["ref_close"] - df["ref_low"]) / df["ref_range"]
        df["ref_epc"] = np.ceil(
            np.minimum((1 - df["ref_percentr"]), df["ref_percentr"]) / 0.1
        )
        df["ref_epc_dir"] = np.where(
            df["ref_percentr"] == np.minimum((1 - df["ref_percentr"]), df["ref_percentr"]),
            1,
            0,
        )
        df["ref_ce_percent"] = np.minimum((1 - df["ref_percentr"]), df["ref_percentr"])
        df["ref_ce_value"] = np.where(
            df["ref_epc_dir"] == 1,
            df["ref_high"] - df["ref_close"],
            df["ref_close"] - df["ref_low"],
        )
        df["ref_hp_flag"] = np.where(df["ref_ce_percent"] <= 0.25, 1, 0)

        # Range Expansions
        df["reu_value"] = np.where(df["high"] > df["ref_high"], df["high"] - df["ref_high"], 0)
        df["red_value"] = np.where(df["low"] < df["ref_low"], df["ref_low"] - df["low"], 0)
        df["reu_flag"] = np.where(df["reu_value"] > 0, 1, 0)
        df["red_flag"] = np.where(df["red_value"] > 0, 1, 0)
        df["re_value"] = df["reu_value"] + df["red_value"]
        df["re_flag"] = np.where(df["re_value"] > 0, 1, 0)

        # Directional Changes
        df["re_number_dir"] = np.where(
            (df["reu_value"] > 0) & (df["red_value"] > 0),
            1,
            0,
        )
        df["ere_e1"] = np.where(
            df["ref_epc_dir"] == 1,
            np.where(df["red_value"] > 0, 1, 0),
            np.where(df["reu_value"] > 0, 1, 0),
        )
        df["ere_e2"] = np.where(
            df["ref_epc_dir"] == 0,
            np.where(df["red_value"] > 0, 1, 0),
            np.where(df["reu_value"] > 0, 1, 0),
        )

        return df

    def export_data(self, df, ticker, output_folder):
        """
        Export the processed data to the output folder.
        """
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        output_file = os.path.join(output_folder, f"{ticker}_basicmodel.csv")
        df.to_csv(output_file, index=False)
        print(f"Data exported for {ticker} to {output_file}")

    def generate_summary(self, df, ticker, summary_folder):
        """
        Generate a summary by year with totals and percentage breakdowns.
        """
        df["year"] = pd.to_datetime(df["date"]).dt.year
        summary = df.groupby("year").agg(
            total_rows=("date", "count"),
            total_re_flag=("re_flag", "sum"),
            total_reu_flag=("reu_flag", "sum"),
            total_red_flag=("red_flag", "sum"),
            reu_percent=("reu_flag", lambda x: np.mean(x) * 100),
            red_percent=("red_flag", lambda x: np.mean(x) * 100),
        ).reset_index()

        if not os.path.exists(summary_folder):
            os.makedirs(summary_folder)
        summary_file = os.path.join(summary_folder, f"{ticker}_basicmodel_summary.csv")
        summary.to_csv(summary_file, index=False)
        print(f"Summary exported for {ticker} to {summary_file}")

    def process_folder(self, input_folder, output_folder, summary_folder):
        """
        Process all CSV files in the input folder and generate output and summary files.
        """
        data = self.read_csv_files(input_folder)
        for ticker, df in data.items():
            print(f"Processing {ticker}...")
            processed_data = self.calculate_metrics(df)
            self.export_data(processed_data, ticker, output_folder)
            self.generate_summary(processed_data, ticker, summary_folder)


# Example Usage
input_folder = "/content/input_basicmodel"
output_folder = "output_basicmodel"
summary_folder = "summary_basicmodel"

model = BasicModel(rolling_range_duration=1)
model.process_folder(input_folder, output_folder, summary_folder)


  df = pd.read_csv(


Processing MMM_D...
Data exported for MMM_D to output_basicmodel/MMM_D_basicmodel.csv
Summary exported for MMM_D to summary_basicmodel/MMM_D_basicmodel_summary.csv
Processing AAPL_D...
Data exported for AAPL_D to output_basicmodel/AAPL_D_basicmodel.csv
Summary exported for AAPL_D to summary_basicmodel/AAPL_D_basicmodel_summary.csv
Processing AFL_D...
Data exported for AFL_D to output_basicmodel/AFL_D_basicmodel.csv
Summary exported for AFL_D to summary_basicmodel/AFL_D_basicmodel_summary.csv
Processing WTI_D...
Data exported for WTI_D to output_basicmodel/WTI_D_basicmodel.csv
Summary exported for WTI_D to summary_basicmodel/WTI_D_basicmodel_summary.csv
