In [None]:
import torch
import numpy as np
import pandas as pd
import yfinance as yf
import requests
import matplotlib.pyplot as plt
from pathlib import Path
from math import sqrt
from chronos import Chronos2Pipeline
import os           

In [None]:
# --- 1. SETTINGS ---
SYMBOL = "TSLA"
INTERVAL = "1wk"
PERIOD = "5y"
MODEL_ID = "amazon/chronos-2"
TEST_SIZE = 30
MIN_CONTEXT = 64
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
AV_API_KEY = os.getenv('ALPHAVANTAGE_API_KEY')

In [None]:
# --- 2. DATA LOADING & SENTIMENT INTEGRATION ---
print(f"Fetching {SYMBOL} price data...")
df = yf.download(SYMBOL, period=PERIOD, interval=INTERVAL, auto_adjust=False, progress=False)
df = df.reset_index().rename(columns={"Date":"timestamp", "Close":"close"})
df["timestamp"] = pd.to_datetime(df["timestamp"]).dt.tz_localize(None)

def fetch_av_sentiment(symbol, api_key):
    """Fetches and processes weekly sentiment scores from Alpha Vantage."""
    print(f"Fetching Alpha Vantage sentiment for {symbol}...")
    url = f"https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbol}&limit=1000&apikey={api_key}"
    try:
        data = requests.get(url).json()
        if "feed" not in data: return None
        sentiments = []
        for item in data["feed"]:
            score = next((float(ts['ticker_sentiment_score']) for ts in item['ticker_sentiment'] if ts['ticker'] == symbol), 0.0)
            dt = pd.to_datetime(item["time_published"]).tz_localize(None)
            sentiments.append({"timestamp": dt, "sentiment": score})
        sent_df = pd.DataFrame(sentiments).set_index("timestamp").resample('W').mean().fillna(0)
        return sent_df
    except Exception as e:
        print(f"Sentiment fetch failed: {e}")
        return None

sentiment_df = fetch_av_sentiment(SYMBOL, AV_API_KEY)

# Merge Sentiment with Price (aligned by timestamp)
if sentiment_df is not None:
    df = pd.merge_asof(df.sort_values("timestamp"), sentiment_df.sort_values("timestamp"), on="timestamp", direction="backward")
else:
    print("Using neutral sentiment placeholder.")
    df["sentiment"] = 0.0

# Prepare multivariate dataframe
prices = df.set_index("timestamp")[["close", "sentiment"]].astype(float).dropna()

In [None]:
# --- 3. MODEL INITIALIZATION ---
print(f"Loading {MODEL_ID} on {DEVICE}...")
pipeline = Chronos2Pipeline.from_pretrained(
    MODEL_ID,
    device_map=DEVICE,
    torch_dtype=torch.float32,
)

In [None]:
# --- 4. MULTIVARIATE PREDICTION FUNCTION ---
def chronos_predict_multivariate(train_df: pd.DataFrame) -> float:
    """
    Chronos-2 Multivariate: Uses Group Attention to predict 'close' 
    while observing 'sentiment' as a covariate.
    """
    # Format into long format: [timestamp, item_id, target]
    train_long = train_df.reset_index().melt(id_vars="timestamp", var_name="target_type", value_name="target")
    train_long["item_id"] = "TSLA_COMBO"  # Sharing ID enables cross-variable learning

    forecast_df = pipeline.predict_df(
        train_long,
        prediction_length=1,
        quantile_levels=[0.5],
        id_column="item_id",
        timestamp_column="timestamp",
        target="target"
    )
    
    # Extract only the forecast for the 'close' price
    price_forecast = forecast_df[forecast_df["target_type"] == "close"]
    return float(price_forecast["0.5"].iloc[0])

In [None]:
# --- 5. BACKTESTING ---
def backtest_one_step_multivariate(full_df: pd.DataFrame, test_size: int):
    preds = []
    split_idx = len(full_df) - test_size

    for i in range(split_idx, len(full_df)):
        train = full_df.iloc[:i]
        if len(train) < MIN_CONTEXT:
            continue

        ts = full_df.index[i]
        y_true = float(full_df.iloc[i]["close"])
        
        try:
            # Pass the dataframe with both price and sentiment
            y_pred = chronos_predict_multivariate(train)
            preds.append({"timestamp": ts, "y_true": y_true, "y_pred": y_pred})
        except Exception as e:
            print(f"Skipping {ts} due to error: {e}")
            continue

    return pd.DataFrame(preds)

pred_chronos_sent = backtest_one_step_multivariate(prices, TEST_SIZE)

In [None]:

# --- 6. EVALUATION & PLOT ---
def print_metrics(df_p):
    y, yh = df_p["y_true"], df_p["y_pred"]
    mae = np.mean(np.abs(y - yh))
    mape = np.mean(np.abs((y - yh) / y)) * 100
    rmse = sqrt(np.mean((y - yh) ** 2))
    print(f"\n--- Metrics (Chronos-v2 + Alpha Vantage Sentiment) ---")
    print(f"MAE:  ${mae:,.2f}")
    print(f"RMSE: ${rmse:,.2f}")
    print(f"MAPE: {mape:.2f}%")

if not pred_chronos_sent.empty:
    print_metrics(pred_chronos_sent)
    
    plt.figure(figsize=(12, 6))
    plt.plot(pred_chronos_sent["timestamp"], pred_chronos_sent["y_true"], label="Actual", marker='o')
    plt.plot(pred_chronos_sent["timestamp"], pred_chronos_sent["y_pred"], label="Chronos + Sentiment", linestyle='--')
    plt.title(f"{SYMBOL} Price Prediction (Multivariate Chronos-v2)")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()



In [None]:
# --- 7. SAVE ARTIFACTS ---
REPO_ROOT = Path.cwd().parent.parent
OUTDIR = REPO_ROOT / "model" / "notebooks" / "artifacts"
OUTDIR.mkdir(parents=True, exist_ok=True)
path = OUTDIR / "pred_chronos_sentiment_weekly.parquet"

pred_chronos_sent["timestamp"] = pd.to_datetime(pred_chronos_sent["timestamp"])
pred_chronos_sent.to_parquet(path, index=False)
print("Saved successfully to:", path)