<a href="https://colab.research.google.com/github/NathanDietrich/Iron-Knight-Investments/blob/main/DailyPreprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

def compute_sma(df, window=14):
    return df['Close'].rolling(window=window).mean()

def compute_ema(df, span=14):
    return df['Close'].ewm(span=span, adjust=False).mean()

def compute_rsi(df, window=14):
    delta = df['Close'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=window).mean()
    avg_loss = loss.rolling(window=window, min_periods=window).mean()
    rs = avg_gain / (avg_loss + 1e-10)
    rsi = 100 - (100 / (1 + rs))
    return rsi

def compute_macd(df, span_short=12, span_long=26, span_signal=9):
    ema_short = df['Close'].ewm(span=span_short, adjust=False).mean()
    ema_long  = df['Close'].ewm(span=span_long,  adjust=False).mean()
    macd_line = ema_short - ema_long
    signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
    return macd_line, signal_line

def compute_bollinger_bands(df, window=20, num_std=2):
    sma = df['Close'].rolling(window=window).mean()
    rstd = df['Close'].rolling(window=window).std()
    upper_band = sma + num_std * rstd
    lower_band = sma - num_std * rstd
    return sma, upper_band, lower_band


def daily_preprocess_for_prediction(ticker, api_key, lookback_days=30):
    """
    1) Fetch ~30 days of data (stock + sentiment) including today’s partial bar.
    2) Compute technical indicators on the entire dataset.
    3) Build a single row of features:
         prev_* (Open, High, Low, Close, Volume, indicators, sentiments)
         Open_current  (today’s open)
    4) Return that 1-row DataFrame or None if data is incomplete.
    """
    # 1) Fetch the last ~30 days from Polygon
    end_date = pd.to_datetime("today").date()

    # fetch stock data
    stock_df = fetch_recent_stock_data_polygon(ticker, lookback_days, api_key)
    if stock_df is None or stock_df.empty:
        print("No stock data found.")
        return None

    # fetch sentiment
    sentiment_df = fetch_recent_sentiment_polygon(ticker, lookback_days, api_key)

    # merge
    merged = pd.merge(stock_df, sentiment_df, on="Date", how="left")
    merged[["sentiment_polarity", "sentiment_subjectivity"]] = (
        merged[["sentiment_polarity", "sentiment_subjectivity"]]
        .ffill()
        .fillna(0)
    )

    # 2) Compute technical indicators
    merged.sort_values("Date", inplace=True)
    merged.reset_index(drop=True, inplace=True)

    merged['SMA_14'] = compute_sma(merged, window=14)
    merged['EMA_14'] = compute_ema(merged, span=14)
    merged['RSI_14'] = compute_rsi(merged, window=14)
    merged['MACD'], merged['MACD_Signal'] = compute_macd(merged)
    bb_sma, bb_upper, bb_lower = compute_bollinger_bands(merged)
    merged['BB_Middle'] = bb_sma
    merged['BB_Upper']  = bb_upper
    merged['BB_Lower']  = bb_lower

    # Fill missing indicators
    merged.ffill(inplace=True)
    merged.bfill(inplace=True)

    # We need at least 2 rows: yesterday + today
    if len(merged) < 2:
        print("Not enough rows to build daily prediction features.")
        return None

    # 3) Construct the single-row feature set
    #   "yesterday" is merged.iloc[-2]
    #   "today" is merged.iloc[-1]
    yest = merged.iloc[-2]
    today = merged.iloc[-1]

    if today['Date'] != end_date:
        print("Polygon doesn’t have a partial bar for today yet.")
        return None

    # Build columns
    # These are the "base" columns from your pipeline
    base_cols = [
        'Open', 'High', 'Low', 'Close', 'Volume',
        'SMA_14', 'EMA_14', 'RSI_14', 'MACD', 'MACD_Signal',
        'BB_Middle', 'BB_Upper', 'BB_Lower'
    ]
    # Sentiment columns:
    sentiment_cols = ['sentiment_polarity', 'sentiment_subjectivity']

    row_data = {}
    for col in base_cols + sentiment_cols:
        row_data['prev_' + col] = yest[col]

    # The "Open_current" is today's open
    row_data['Open_current'] = today['Open']
    # We do NOT include "Target_Close" because we don’t know it yet.

    # Return a 1-row DataFrame
    daily_features = pd.DataFrame([row_data])
    return daily_features


# Example usage:
if __name__ == "__main__":
    import getpass

    polygon_key = getpass.getpass("Enter your Polygon.io API key: ")
    ticker = "AAPL"

    features_df = daily_preprocess_for_prediction(ticker, polygon_key)
    if features_df is not None:
        print("Daily features for model prediction:")
        print(features_df)
    else:
        print("No features returned.")
