In [1]:
from google.colab import drive
import pandas as pd
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install git+https://github.com/amazon-science/chronos-forecasting.git
!pip install pandas torch transformers accelerate
!pip install -U "transformers>=4.40.0" accelerate bitsandbytes
!pip install -U transformers trl peft accelerate bitsandbytes datasets

Collecting git+https://github.com/amazon-science/chronos-forecasting.git
  Cloning https://github.com/amazon-science/chronos-forecasting.git to /tmp/pip-req-build-r0jzrtmf
  Running command git clone --filter=blob:none --quiet https://github.com/amazon-science/chronos-forecasting.git /tmp/pip-req-build-r0jzrtmf
  Resolved https://github.com/amazon-science/chronos-forecasting.git to commit 9afe64332f2456188da9375daa57e87eff7512ca
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: chronos-forecasting
  Building wheel for chronos-forecasting (pyproject.toml) ... [?25l[?25hdone
  Created wheel for chronos-forecasting: filename=chronos_forecasting-2.2.0rc3-py3-none-any.whl size=72076 sha256=2bd9e61a62f5288f54d1c1a874327bfa4b5e4e961be5dd9a73829f5decc4c93f
  Stored in directory: /tmp/pip-ephem-wheel-cache-k7ib_fgp/wheels/b9/a6/b5/75f

In [6]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error
from chronos import ChronosPipeline

In [2]:
timeseries_path = "/content/drive/MyDrive/covid_us_timeseries.csv"
sentiment_path = "/content/drive/MyDrive/covid_ts_sentiment_combined.csv"


timeseries_df = pd.read_csv(timeseries_path)
sentiment_df = pd.read_csv(sentiment_path)

data = pd.merge(timeseries_df, sentiment_df , on="date", how="inner")

In [3]:
data_clean = data.copy()
data_clean = data_clean.ffill().bfill()

# Ensure date is the index
data_clean.set_index('date', inplace=True)
df = data_clean.copy()

In [13]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
import matplotlib.pyplot as plt

# Suppress warnings for cleaner output
warnings.simplefilter('ignore', ConvergenceWarning)
warnings.simplefilter('ignore', UserWarning)

# ==========================================
# CONFIGURATION
# ==========================================
# We compare Daily Cases, Daily Deaths, and Cumulative Vaccinations
targets = ['new_cases', 'new_deaths', 'people_vaccinated']
senti_col = 'fear_intensity'
TAU = 14

print(f"--- Running SARIMAX Comparison for {len(targets)} Targets ---")

# ==========================================
# MAIN LOOP
# ==========================================
for target_col in targets:
    print(f"\n" + "="*60)
    print(f"🎯 TARGET: {target_col}")
    print("="*60)

    # 1. DATA PREP & SHIFTING
    df_exp = df.copy()

    # Check if column exists
    if target_col not in df_exp.columns:
        print(f"⚠️ Column '{target_col}' not found. Skipping.")
        continue

    # Create the Shifted Feature
    df_exp['fear_shifted'] = df_exp[senti_col].shift(TAU)

    # Drop NaNs (Critical for alignment)
    df_exp = df_exp.dropna(subset=[target_col, senti_col, 'fear_shifted'])

    # 2. SCALING (0-1 Range for Friend Comparison)
    scaler_y = MinMaxScaler()
    scaler_x = MinMaxScaler()

    y_raw = df_exp[target_col].values.reshape(-1, 1)
    y_scaled = scaler_y.fit_transform(y_raw).flatten()

    x_raw_vals = df_exp[senti_col].values.reshape(-1, 1)
    x_shift_vals = df_exp['fear_shifted'].values.reshape(-1, 1)

    x_raw_scaled = scaler_x.fit_transform(x_raw_vals)
    x_shift_scaled = scaler_x.fit_transform(x_shift_vals)

    # Split (80/20)
    split = int(len(y_scaled) * 0.8)

    y_train, y_test = y_scaled[:split], y_scaled[split:]
    x_raw_train, x_raw_test = x_raw_scaled[:split], x_raw_scaled[split:]
    x_shift_train, x_shift_test = x_shift_scaled[:split], x_shift_scaled[split:]

    # Also keep an unscaled version of y_test for nicer plots
    y_test_orig = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()

    # 3. RUN CONFIGURATIONS
    configs = [
        ("1. Baseline (No Sentiment)", None, None),
        ("2. With Sentiment (Raw)", x_raw_train, x_raw_test),
        (f"3. With Sentiment (Tau={TAU})", x_shift_train, x_shift_test)
    ]

    print(f"{'Configuration':<35} | {'Test MSE':<10} | {'Improvement'}")
    print("-" * 65)

    baseline_mse = 0

    for i, (name, train_exog, test_exog) in enumerate(configs):
        try:
            # Initialize SARIMAX
            model = SARIMAX(
                y_train,
                exog=train_exog,
                order=(14, 0, 0),
                seasonal_order=(0, 0, 0, 0),
                enforce_stationarity=False,
                enforce_invertibility=False
            )

            model_fit = model.fit(disp=False)
            forecast_res = model_fit.get_forecast(steps=len(y_test), exog=test_exog)
            y_pred = forecast_res.predicted_mean

            mse = mean_squared_error(y_test, y_pred)

            # Improvement calculation
            if i == 0:
                baseline_mse = mse
                imp_str = "-"
            else:
                diff = baseline_mse - mse
                pct = (diff / baseline_mse) * 100
                if pct > 0:
                    imp_str = f"✅ +{pct:.1f}%"
                else:
                    imp_str = f"❌ {pct:.1f}%"

            print(f"{name:<35} | {mse:.5f}    | {imp_str}")

            # ==========================
            # PLOT PRED vs TRUTH & SAVE
            # ==========================
            # Convert prediction back to original scale
            y_pred_orig = scaler_y.inverse_transform(y_pred.reshape(-1, 1)).flatten()

            plt.figure(figsize=(10, 4))
            plt.plot(y_test_orig, label='Truth', linewidth=2)
            plt.plot(y_pred_orig, label='Prediction', linestyle='--')
            plt.title(f"{target_col} – {name}")
            plt.xlabel("Time (test index)")
            plt.ylabel(target_col)
            plt.legend()
            plt.tight_layout()

            # Safe filename, e.g. "new_cases_config1.png"
            fname = f"{target_col}_config{i+1}.png".replace(" ", "_")
            plt.savefig(fname, dpi=150)
            plt.close()

            # Optional: print where it was saved
            print(f"   📊 Saved plot to: {fname}")

        except Exception as e:
            print(f"{name:<35} | FAILED       | {str(e)[:50]}...")

print("-" * 65)


--- Running SARIMAX Comparison for 3 Targets ---

🎯 TARGET: new_cases
Configuration                       | Test MSE   | Improvement
-----------------------------------------------------------------
1. Baseline (No Sentiment)          | 0.17947    | -
   📊 Saved plot to: new_cases_config1.png
2. With Sentiment (Raw)             | 0.16730    | ✅ +6.8%
   📊 Saved plot to: new_cases_config2.png
3. With Sentiment (Tau=14)          | 0.15832    | ✅ +11.8%
   📊 Saved plot to: new_cases_config3.png

🎯 TARGET: new_deaths
Configuration                       | Test MSE   | Improvement
-----------------------------------------------------------------
1. Baseline (No Sentiment)          | 0.05084    | -
   📊 Saved plot to: new_deaths_config1.png
2. With Sentiment (Raw)             | 0.04921    | ✅ +3.2%
   📊 Saved plot to: new_deaths_config2.png
3. With Sentiment (Tau=14)          | 0.04450    | ✅ +12.5%
   📊 Saved plot to: new_deaths_config3.png

🎯 TARGET: people_vaccinated
Configuration         

In [4]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt   # <-- added for plotting

# ==========================================
# 1. DEFINE FUSION MODEL (Dual-Encoder)
# ==========================================
class DualBranchFusionLSTM(nn.Module):
    def __init__(self, senti_dim, hidden_dim=32):
        super(DualBranchFusionLSTM, self).__init__()

        # Branch 1: Temporal Encoder (Target History)
        self.lstm_target = nn.LSTM(input_size=1, hidden_size=hidden_dim, batch_first=True)

        # Branch 2: Sentiment Encoder (Sentiment History)
        self.lstm_senti = nn.LSTM(input_size=senti_dim, hidden_size=hidden_dim, batch_first=True)

        # Fusion Layer (Combines the two branches)
        self.fusion_head = nn.Sequential(
            nn.Linear(hidden_dim * 2, 16),  # Fuse 32+32 -> 16
            nn.ReLU(),
            nn.Linear(16, 1)  # Predict scalar
        )

    def forward(self, x_target, x_senti):
        # Pass through separate LSTMs
        _, (h_target, _) = self.lstm_target(x_target)
        _, (h_senti, _) = self.lstm_senti(x_senti)

        # Concatenate the final hidden states
        # h shape is (1, batch, hidden) -> remove dim 0
        feat_target = h_target[-1]
        feat_senti = h_senti[-1]

        combined = torch.cat((feat_target, feat_senti), dim=1)

        # Predict
        return self.fusion_head(combined)

# ==========================================
# 2. CONFIGURATION
# ==========================================
targets = ['new_cases', 'new_deaths', 'people_vaccinated']
senti_cols = ['fear_intensity', 'valence_intensity', 'anger_intensity',
              'happiness_intensity', 'sadness_intensity']
TAU = 14

print(f"--- Running Multimodal Fusion LSTM on {len(targets)} Targets ---")
print(f"{'Target':<20} | {'Scaled MSE':<12} | {'Real MAE':<10}")
print("-" * 50)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==========================================
# 3. MAIN LOOP
# ==========================================
for target_col in targets:
    if target_col not in df.columns:
        continue

    # --- A. Data Prep ---
    df_exp = df.copy()
    # Apply Shift to Sentiment
    for c in senti_cols:
        df_exp[c] = df_exp[c].shift(TAU)
    df_exp = df_exp.dropna()

    # Scale Data (0-1)
    scaler_target = MinMaxScaler()
    scaler_senti = MinMaxScaler()

    y_raw = df_exp[target_col].values.reshape(-1, 1)
    x_senti_raw = df_exp[senti_cols].values

    y_scaled = scaler_target.fit_transform(y_raw)
    x_senti_scaled = scaler_senti.fit_transform(x_senti_raw)

    # Create Sequences (Dual Input)
    X_target, X_senti, y = [], [], []
    lookback = 30

    for i in range(len(y_scaled) - lookback):
        # Input 1: History of Target
        X_target.append(y_scaled[i:i+lookback])
        # Input 2: History of Sentiment
        X_senti.append(x_senti_scaled[i:i+lookback])
        # Output: Next Target
        y.append(y_scaled[i+lookback])

    X_target = torch.tensor(np.array(X_target), dtype=torch.float32).to(device)
    X_senti = torch.tensor(np.array(X_senti), dtype=torch.float32).to(device)
    y = torch.tensor(np.array(y), dtype=torch.float32).to(device)

    # Split
    split = int(len(y) * 0.8)

    # Datasets
    train_ds = TensorDataset(X_target[:split], X_senti[:split], y[:split])
    test_ds = TensorDataset(X_target[split:], X_senti[split:], y[split:])

    train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

    # --- B. Train ---
    model = DualBranchFusionLSTM(senti_dim=len(senti_cols)).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

    model.train()
    for epoch in range(50):
        for b_t, b_s, b_y in train_loader:
            optimizer.zero_grad()
            preds = model(b_t, b_s).squeeze()        # (batch,)
            loss = criterion(preds, b_y.squeeze())
            loss.backward()
            optimizer.step()

    # --- C. Evaluate ---
    model.eval()
    preds_scaled = []
    actuals_scaled = []

    with torch.no_grad():
        for b_t, b_s, b_y in test_loader:
            preds = model(b_t, b_s).squeeze()        # (batch,)
            preds_scaled.extend(preds.cpu().numpy().ravel())
            actuals_scaled.extend(b_y.cpu().numpy().ravel())

    preds_scaled = np.array(preds_scaled).reshape(-1, 1)
    actuals_scaled = np.array(actuals_scaled).reshape(-1, 1)

    # Calculate Metrics
    mse_scaled = mean_squared_error(actuals_scaled, preds_scaled)

    # Inverse Transform for Real MAE
    preds_real = scaler_target.inverse_transform(preds_scaled)
    actuals_real = scaler_target.inverse_transform(actuals_scaled)
    mae_real = mean_absolute_error(actuals_real, preds_real)

    print(f"{target_col:<20} | {mse_scaled:.5f}      | {mae_real:,.0f}")

    # ==========================================
    # D. PLOT PRED vs TRUTH (NORMALIZED) & SAVE
    # ==========================================
    plt.figure(figsize=(10, 4))
    # use the scaled values in [0, 1]
    plt.plot(actuals_scaled.flatten(), label="Truth", linewidth=2)
    plt.plot(preds_scaled.flatten(), label="Prediction", linestyle="--")

    plt.title(f"Fusion LSTM – {target_col} (normalized)")
    plt.xlabel("Date")  # or "Time (test index)" if you prefer
    plt.ylabel("Cases (normalized)")
    plt.legend()
    plt.tight_layout()

    fname = f"fusion_lstm_{target_col}_normalized.png"
    plt.savefig(fname, dpi=150)
    plt.close()

    print(f"   📊 Saved plot to: {fname}")

print("-" * 50)


--- Running Multimodal Fusion LSTM on 3 Targets ---
Target               | Scaled MSE   | Real MAE  
--------------------------------------------------
new_cases            | 0.01357      | 24,059
   📊 Saved plot to: fusion_lstm_new_cases_normalized.png
new_deaths           | 0.01899      | 374
   📊 Saved plot to: fusion_lstm_new_deaths_normalized.png
people_vaccinated    | 0.00023      | 2,334,748
   📊 Saved plot to: fusion_lstm_people_vaccinated_normalized.png
--------------------------------------------------


In [None]:
import numpy as np
import re
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# ============================================================
# 1. Check model/tokenizer availability
# ============================================================
try:
    model
    tokenizer
except:
    raise RuntimeError("❌ 'model' and 'tokenizer' must already be loaded in memory!")

if 'df' not in globals():
    raise RuntimeError("❌ 'df' not found. Load your timeseries first!")

# ============================================================
# 2. Forecaster in LLMTime format
# ============================================================
def get_llama_prediction(history_list, debug=False):
    """Returns 1-step prediction using existing Llama model."""

    history_str = ", ".join([str(int(x)) for x in history_list])

    prompt = (
        f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        f"You are a pattern completion engine. Given the sequence of numbers, "
        f"predict the next ONE number. Return only the number. No text.<|eot_id|>"
        f"<|start_header_id|>user<|end_header_id|>\n\n"
        f"Sequence: {history_str}\n"
        f"Next number:<|eot_id|>"
        f"<|start_header_id|>assistant<|end_header_id|>\n\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if debug:
        print("============== RAW MODEL RESPONSE ==============")
        print(response)
        print("================================================")

    # Try parse number from assistant reply
    try:
        if "<|start_header_id|>assistant<|end_header_id|>" in response:
            raw_answer = response.split("<|start_header_id|>assistant<|end_header_id|>", 1)[-1]
        else:
            raw_answer = response

        nums = re.findall(r"[-+]?\d*\.\d+|\d+", raw_answer)
        if nums:
            return float(nums[-1])

        # fallback: persistence
        return float(history_list[-1])
    except:
        return float(history_list[-1])

# ============================================================
# 3. Prepare data
# ============================================================
TARGET = "new_cases"

raw_series = df[TARGET].values.astype(float)
split = int(len(raw_series) * 0.8)

test_series = raw_series[split:]
history_window = 20

eval_len = min(30, len(test_series) - history_window)
indices = range(len(test_series) - eval_len, len(test_series))

actuals, preds = [], []

# ============================================================
# 4. Fit scaler for Scaled MSE
# ============================================================
scaler = MinMaxScaler()
scaler.fit(raw_series.reshape(-1, 1))

# ============================================================
# 5. Run evaluation (first 3 samples printed raw)
# ============================================================
print(f"Evaluating last {len(indices)} test points...\n")

for k, i in enumerate(indices, start=1):
    hist = test_series[i - history_window : i]
    actual = test_series[i]

    pred = get_llama_prediction(hist, debug=(k <= 3))

    actuals.append(actual)
    preds.append(pred)

    print(f"[{k}] True={int(actual)}, Pred={int(pred)}")

actuals = np.array(actuals, dtype=float)
preds = np.array(preds, dtype=float)

# ============================================================
# 6. Compute MAE & Scaled MSE
# ============================================================
mae = mean_absolute_error(actuals, preds)

scaled_actuals = scaler.transform(actuals.reshape(-1, 1))
scaled_preds = scaler.transform(preds.reshape(-1, 1))
scaled_mse = mean_squared_error(scaled_actuals, scaled_preds)

# ============================================================
# 7. Print results
# ============================================================
print("\n" + "="*60)
print("📊 Base Llama-3 8B (No Finetuning)")
print("="*60)
print(f"MAE:        {mae:,.2f}")
print(f"Scaled MSE: {scaled_mse:.6f}")
print("="*60)


Evaluating last 30 test points...

system

You are a pattern completion engine. Given the sequence of numbers, predict the next ONE number. Return only the number. No text.user

Sequence: 31797, 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150
Next number:assistant

106151
[1] True=106150, Pred=106151
system

You are a pattern completion engine. Given the sequence of numbers, predict the next ONE number. Return only the number. No text.user

Sequence: 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150, 106150
Next number:assistant

106150
[2] True=106150, Pred=106150
system

You are a pattern completion engine. Given the sequence of numbers, predict the next ONE number. Return only the number. No text.user

Sequence: 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78

In [None]:
import numpy as np
import re
import torch
from contextlib import nullcontext
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# ============================================================
# 0. Check model/tokenizer availability
# ============================================================
try:
    model
    tokenizer
except NameError:
    raise RuntimeError("❌ 'model' and 'tokenizer' must already be loaded in memory!")

if 'df' not in globals():
    raise RuntimeError("❌ 'df' not found. Load your timeseries first!")

# ============================================================
# 1. Autocast helper (fix Float vs Half issue)
# ============================================================
def autocast_ctx():
    if torch.cuda.is_available():
        return torch.amp.autocast(device_type="cuda", dtype=torch.float32)
    return nullcontext()

# ============================================================
# 2. Forecaster with <answer> tags + "closest number" heuristic
# ============================================================
def get_llama_prediction(history_list, debug=False):
    """
    Returns 1-step prediction using existing Llama model.
    - Asks model to put the number inside <answer>...</answer>.
    - If multiple numbers appear, picks the one closest to the last history value.
    """

    history_str = ", ".join([str(int(x)) for x in history_list])
    last_val = float(history_list[-1])

    prompt = (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "You are a pattern completion engine. Given a sequence of numbers, "
        "predict the next ONE number.\n"
        "- Output ONLY the number, wrapped in <answer> and </answer>.\n"
        "- Do not add any extra text.\n\n"
        "Example:\n"
        "Sequence: 10, 20, 30\n"
        "Next number:\n"
        "<answer>40</answer>\n"
        "<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\n"
        f"Sequence: {history_str}\n"
        "Next number:\n"
        "<|eot_id|>"
        "<|start_header_id|>assistant<|end_header_id|>\n\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with autocast_ctx():
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=16,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id
            )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if debug:
        print("============== RAW MODEL RESPONSE ==============")
        print(response)
        print("================================================")

    try:
        # 1) Prefer content inside <answer>...</answer>
        if "<answer>" in response and "</answer>" in response:
            raw_answer = response.split("<answer>", 1)[-1].split("</answer>", 1)[0]
        else:
            # 2) Otherwise, strip to the assistant chunk
            if "<|start_header_id|>assistant<|end_header_id|>" in response:
                raw_answer = response.split(
                    "<|start_header_id|>assistant<|end_header_id|>", 1
                )[-1]
            else:
                raw_answer = response

        # 3) Find all numbers, then pick the one closest to last history value
        num_strs = re.findall(r"[-+]?\d*\.\d+|\d+", raw_answer)
        if num_strs:
            nums = [float(s) for s in num_strs]
            best = min(nums, key=lambda v: abs(v - last_val))
            return best

        # 4) Fallback: persistence
        return last_val
    except Exception as e:
        print("⚠️ Parsing error:", e)
        return last_val

# ============================================================
# 3. Evaluate a single target column
# ============================================================
def evaluate_target_base(target_col, history_window=20, eval_last=30):
    if target_col not in df.columns:
        raise ValueError(f"Column '{target_col}' not found in df")

    raw_series = df[target_col].values.astype(float)
    split = int(len(raw_series) * 0.8)
    test_series = raw_series[split:]

    if len(test_series) <= history_window + 5:
        raise ValueError(f"Not enough test data for {target_col}")

    eval_len = min(eval_last, len(test_series) - history_window)
    indices = range(len(test_series) - eval_len, len(test_series))

    actuals, preds = [], []

    scaler = MinMaxScaler()
    scaler.fit(raw_series.reshape(-1, 1))

    print(f"\n🔍 Evaluating Base Model on '{target_col}' (last {len(indices)} pts)...\n")

    for k, i in enumerate(indices, start=1):
        hist = test_series[i - history_window : i]
        actual = test_series[i]

        pred = get_llama_prediction(hist, debug=(k <= 2))

        actuals.append(actual)
        preds.append(pred)

        print(f"[{k:02d}] {target_col:18s} | True={int(actual):10d} | Pred={int(pred):10d}")

    actuals = np.array(actuals, dtype=float)
    preds = np.array(preds, dtype=float)

    mae = mean_absolute_error(actuals, preds)

    scaled_actuals = scaler.transform(actuals.reshape(-1, 1))
    scaled_preds = scaler.transform(preds.reshape(-1, 1))
    scaled_mse = mean_squared_error(scaled_actuals, scaled_preds)

    print("\n" + "="*60)
    print(f"📊 Base Llama-3 — {target_col}")
    print("="*60)
    print(f"MAE:        {mae:,.2f}")
    print(f"Scaled MSE: {scaled_mse:.6f}")
    print("="*60)

    return mae, scaled_mse

# ============================================================
# 4. Loop over all three targets
# ============================================================
TARGETS = ['new_cases', 'new_deaths', 'people_vaccinated']
results = {}

for col in TARGETS:
    mae, mse = evaluate_target_base(col)
    results[col] = (mae, mse)

# ============================================================
# 5. Summary
# ============================================================
print("\n===== 📌 SUMMARY — Base Llama-3 (All Targets) =====")
for col in TARGETS:
    mae, mse = results[col]
    print(f"{col:20s} | MAE={mae:,.2f} | Scaled MSE={mse:.6f}")



🔍 Evaluating Base Model on 'new_cases' (last 30 pts)...

system

You are a pattern completion engine. Given a sequence of numbers, predict the next ONE number.
- Output ONLY the number, wrapped in <answer> and </answer>.
- Do not add any extra text.

Example:
Sequence: 10, 20, 30
Next number:
<answer>40</answer>
user

Sequence: 31797, 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150
Next number:
assistant

<ICYICYICYICYICYICYICYICYICYICYICYICYICYICYICY
[01] new_cases          | True=    106150 | Pred=    106150
system

You are a pattern completion engine. Given a sequence of numbers, predict the next ONE number.
- Output ONLY the number, wrapped in <answer> and </answer>.
- Do not add any extra text.

Example:
Sequence: 10, 20, 30
Next number:
<answer>40</answer>
user

Sequence: 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155,

In [None]:
import numpy as np
import torch
import re
from datasets import Dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from contextlib import nullcontext

# ---------- sanity ----------
assert 'df' in globals(), "df must be loaded"
assert 'model' in globals() and 'tokenizer' in globals(), "model + tokenizer must be loaded"

TARGET = "new_cases"
assert TARGET in df.columns, f"{TARGET} must be in df"

# autocast helper (for generation later)
def autocast_ctx():
    if torch.cuda.is_available():
        return torch.amp.autocast(device_type="cuda", dtype=torch.float32)
    return nullcontext()

# ---------- build QLoRA train dataset with strict 6-digit format ----------
print("🔧 Building QLoRA dataset with 6-digit targets...")

raw_series = df[TARGET].values.astype(float)
split_idx = int(len(raw_series) * 0.8)
train_series = raw_series[:split_idx]

history_window = 20
train_samples = []

FEW_SHOT = """You are a pattern completion engine. Given a sequence of 6-digit integers, predict the next ONE 6-digit integer.
Always output:
- Exactly 6 digits
- No commas, no spaces, no explanation
- Wrapped in <answer> and </answer>

Examples:
Sequence: 031797, 031797, 031797
Next: <answer>051289</answer>

Sequence: 051289, 051289, 051289
Next: <answer>078155</answer>
"""

for i in range(history_window, len(train_series)):
    hist = train_series[i - history_window:i]
    target_val = int(train_series[i])

    # zero-pad to 6 digits
    hist_str = ", ".join(f"{int(x):06d}" for x in hist)
    target_str = f"{target_val:06d}"

    text = (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        + FEW_SHOT
        + "<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n"
        + f"Sequence: {hist_str}\n"
          "Next:<|eot_id|>"
          "<|start_header_id|>assistant<|end_header_id|>\n\n"
        + f"<answer>{target_str}</answer>"
    )

    train_samples.append({"text": text})

train_ds = Dataset.from_list(train_samples)
print(f"✅ QLoRA train samples: {len(train_ds)}")

# ---------- QLoRA config ----------
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"],
)

sft_config = SFTConfig(
    output_dir="./llama_new_cases_6digit_qlora",
    dataset_text_field="text",
    max_steps=60,                     # short run, but more than 40
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=torch.cuda.is_available(),
    logging_steps=10,
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_ds,
    peft_config=peft_config,
    args=sft_config,
)

print("🚀 Starting QLoRA finetune (6-digit)...")
trainer.train()
print("✅ QLoRA finetune (6-digit) done.")


🔧 Building QLoRA dataset with 6-digit targets...
✅ QLoRA train samples: 190


Adding EOS to train dataset:   0%|          | 0/190 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/190 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/190 [00:00<?, ? examples/s]

🚀 Starting QLoRA finetune (6-digit)...


Step,Training Loss
10,1.8454
20,0.923
30,0.4222
40,0.3427
50,0.2988
60,0.2648


✅ QLoRA finetune (6-digit) done.


In [None]:
import numpy as np
import re
import torch
from contextlib import nullcontext
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# ============================================================
# 0. Checks
# ============================================================
try:
    model
    tokenizer
except NameError:
    raise RuntimeError("'model' and 'tokenizer' must already be loaded")

if 'df' not in globals():
    raise RuntimeError("'df' not found. Load your dataframe first")

# ============================================================
# 1. Autocast helper (fix Float vs Half)
# ============================================================
def autocast_ctx():
    if torch.cuda.is_available():
        return torch.amp.autocast(device_type="cuda", dtype=torch.float32)
    return nullcontext()

# ============================================================
# 2. Dumb-simple predictor: prompt -> first number
# ============================================================
def get_llama_prediction(history_list, debug=False):
    """
    Prompt Llama with a sequence of numbers and return the FIRST number
    it generates after 'Next number:'.
    """
    history_str = ", ".join(str(int(x)) for x in history_list)

    prompt = (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "You are a model that predicts the next value in a numerical time series.\n"
        "Given a sequence of numbers, output ONLY the next number.\n"
        "No words, no explanation, just the number.\n"
        "<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\n"
        f"Sequence: {history_str}\n"
        "Next number:\n"
        "<|eot_id|>"
        "<|start_header_id|>assistant<|end_header_id|>\n\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with autocast_ctx():
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=8,          # small, we only need one number
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id,
            )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if debug:
        print("============== RAW MODEL RESPONSE ==============")
        print(response)
        print("================================================")

    # Just grab the FIRST number we see
    m = re.search(r"[-+]?\d*\.?\d+", response)
    if m:
        return float(m.group(0))

    # If model somehow gives no digits at all, fall back to last value
    return float(history_list[-1])

# ============================================================
# 3. Evaluate a single target
# ============================================================
def evaluate_target_simple(target_col, history_window=20, eval_last=30):
    if target_col not in df.columns:
        raise ValueError(f"Column '{target_col}' not in df")

    raw_series = df[target_col].values.astype(float)
    split = int(len(raw_series) * 0.8)
    test_series = raw_series[split:]

    if len(test_series) <= history_window + 5:
        raise ValueError(f"Not enough test data for {target_col}")

    eval_len = min(eval_last, len(test_series) - history_window)
    indices = range(len(test_series) - eval_len, len(test_series))

    actuals, preds = [], []

    scaler = MinMaxScaler()
    scaler.fit(raw_series.reshape(-1, 1))

    print(f"\n🔍 Evaluating Base Model on '{target_col}' (last {len(indices)} pts)...\n")

    for k, i in enumerate(indices, start=1):
        hist = test_series[i - history_window : i]
        actual = test_series[i]

        pred = get_llama_prediction(hist, debug=(k <= 2))

        actuals.append(actual)
        preds.append(pred)

        print(f"[{k:02d}] {target_col:18s} | True={int(actual):10d} | Pred={int(pred):10d}")

    actuals = np.array(actuals, dtype=float)
    preds = np.array(preds, dtype=float)

    mae = mean_absolute_error(actuals, preds)

    scaled_actuals = scaler.transform(actuals.reshape(-1, 1))
    scaled_preds = scaler.transform(preds.reshape(-1, 1))
    scaled_mse = mean_squared_error(scaled_actuals, scaled_preds)

    print("\n" + "="*60)
    print(f"📊 Base Llama-3 — {target_col}")
    print("="*60)
    print(f"MAE:        {mae:,.2f}")
    print(f"Scaled MSE: {scaled_mse:.6f}")
    print("="*60)

    return mae, scaled_mse

# ============================================================
# 4. Run for all three targets
# ============================================================
TARGETS = ['new_cases', 'new_deaths', 'people_vaccinated']
results = {}

for col in TARGETS:
    mae, mse = evaluate_target_simple(col)
    results[col] = (mae, mse)

print("\n===== SUMMARY — Base Llama-3 (Simple numeric output) =====")
for col in TARGETS:
    mae, mse = results[col]
    print(f"{col:20s} | MAE={mae:,.2f} | Scaled MSE={mse:.6f}")



🔍 Evaluating Base Model on 'new_cases' (last 30 pts)...

system

You are a model that predicts the next value in a numerical time series.
Given a sequence of numbers, output ONLY the next number.
No words, no explanation, just the number.
user

Sequence: 31797, 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150
Next number:
assistant

106ICYICYICYICYICYICYICY
[01] new_cases          | True=    106150 | Pred=     31797
system

You are a model that predicts the next value in a numerical time series.
Given a sequence of numbers, output ONLY the next number.
No words, no explanation, just the number.
user

Sequence: 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150, 106150
Next number:
assistant

106ICYICYICYICYICYICYICY
[02] new_cases          | True=    106150 | Pred=     31797
[03] new_cases          | True=    106150 | Pre

In [None]:
import numpy as np
import re
import torch
import gc
from contextlib import nullcontext
from datasets import Dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# ============================================================
# 0. Sanity checks
# ============================================================
try:
    model
    tokenizer
except NameError:
    raise RuntimeError("'model' and 'tokenizer' must already be loaded (Llama-3).")

if 'df' not in globals():
    raise RuntimeError("'df' not found. Load your timeseries first!")

TARGETS = ['new_cases', 'new_deaths', 'people_vaccinated']
for col in TARGETS:
    if col not in df.columns:
        raise ValueError(f"Column '{col}' not in df.columns")

# ============================================================
# 1. Autocast helper
# ============================================================
def autocast_ctx():
    if torch.cuda.is_available():
        return torch.amp.autocast(device_type="cuda", dtype=torch.float32)
    return nullcontext()

# ============================================================
# 2. SIMPLE PREDICTOR (same as your base version)
# ============================================================
def get_llama_prediction(history_list, debug=False):
    """
    Prompt Llama with a sequence of numbers and return the FIRST number
    it generates after 'Next number:'.
    """
    history_str = ", ".join(str(int(x)) for x in history_list)

    prompt = (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
        "You are a model that predicts the next value in a numerical time series.\n"
        "Given a sequence of numbers, output ONLY the next number.\n"
        "No words, no explanation, just the number.\n"
        "<|eot_id|>"
        "<|start_header_id|>user<|end_header_id|>\n\n"
        f"Sequence: {history_str}\n"
        "Next number:\n"
        "<|eot_id|>"
        "<|start_header_id|>assistant<|end_header_id|>\n\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with autocast_ctx():
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=8,          # small, we only need one number
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id,
            )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if debug:
        print("============== RAW MODEL RESPONSE ==============")
        print(response)
        print("================================================")

    # FIRST number only
    m = re.search(r"[-+]?\d*\.?\d+", response)
    if m:
        return float(m.group(0))

    # if no number at all, fallback to last value
    return float(history_list[-1])

# ============================================================
# 3. Evaluation function (simple numeric)
# ============================================================
def evaluate_target_simple(target_col, history_window=20, eval_last=30):
    raw_series = df[target_col].values.astype(float)
    split = int(len(raw_series) * 0.8)
    test_series = raw_series[split:]

    if len(test_series) <= history_window + 5:
        raise ValueError(f"Not enough test data for {target_col}")

    eval_len = min(eval_last, len(test_series) - history_window)
    indices = range(len(test_series) - eval_len, len(test_series))

    actuals, preds = [], []

    scaler = MinMaxScaler()
    scaler.fit(raw_series.reshape(-1, 1))

    print(f"\n🔍 Evaluating on '{target_col}' (last {len(indices)} pts)...\n")

    for k, i in enumerate(indices, start=1):
        hist = test_series[i - history_window : i]
        actual = test_series[i]

        pred = get_llama_prediction(hist, debug=(k <= 2))

        actuals.append(actual)
        preds.append(pred)

        print(f"[{k:02d}] {target_col:18s} | True={int(actual):10d} | Pred={int(pred):10d}")

    actuals = np.array(actuals, dtype=float)
    preds = np.array(preds, dtype=float)

    mae = mean_absolute_error(actuals, preds)

    scaled_actuals = scaler.transform(actuals.reshape(-1, 1))
    scaled_preds = scaler.transform(preds.reshape(-1, 1))
    scaled_mse = mean_squared_error(scaled_actuals, scaled_preds)

    print("\n" + "="*60)
    print(f"📊 {target_col}")
    print("="*60)
    print(f"MAE:        {mae:,.2f}")
    print(f"Scaled MSE: {scaled_mse:.6f}")
    print("="*60)

    return mae, scaled_mse

# ============================================================
# 4. BASELINE: evaluate BEFORE finetuning
# ============================================================
print("===== 🔹 BASE MODEL (before QLoRA) =====")
base_results = {}
for col in TARGETS:
    mae, mse = evaluate_target_simple(col)
    base_results[col] = (mae, mse)

print("\n----- BASE SUMMARY -----")
for col in TARGETS:
    mae, mse = base_results[col]
    print(f"{col:20s} | MAE={mae:,.2f} | Scaled MSE={mse:.6f}")

# ============================================================
# 5. BUILD QLoRA TRAIN DATASET (all 3 targets, same prompt style)
# ============================================================
print("\n===== 🧪 Building QLoRA training dataset (all 3 targets) =====")

history_window = 20
train_samples = []

for target_col in TARGETS:
    series = df[target_col].values.astype(float)
    # use training portion only (same 80% split)
    split = int(len(series) * 0.8)
    train_series = series[:split]

    for i in range(history_window, len(train_series)):
        hist = train_series[i - history_window:i]
        target = train_series[i]

        hist_str = ", ".join(str(int(x)) for x in hist)

        text = (
            "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
            "You are a model that predicts the next value in a numerical time series.\n"
            "Given a sequence of numbers, output ONLY the next number.\n"
            "No words, no explanation, just the number.\n"
            "<|eot_id|>"
            "<|start_header_id|>user<|end_header_id|>\n\n"
            f"Sequence: {hist_str}\n"
            "Next number:\n"
            "<|eot_id|>"
            "<|start_header_id|>assistant<|end_header_id|>\n\n"
            f"{int(target)}"
        )

        train_samples.append({"text": text})

train_ds = Dataset.from_list(train_samples)
print(f"✅ QLoRA train samples: {len(train_ds)}")

# ============================================================
# 6. QLoRA CONFIG + SHORT FINETUNE
# ============================================================
print("\n===== 🔧 Starting QLoRA finetune (short run) =====")

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"],
)

sft_config = SFTConfig(
    output_dir="./llama_covid_timeseries_qlora_simple",
    dataset_text_field="text",
    max_steps=60,                      # short run
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=torch.cuda.is_available(),
    logging_steps=10,
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_ds,
    peft_config=peft_config,
    args=sft_config,
)

trainer.train()
print("✅ QLoRA finetune complete.")

# ============================================================
# 7. EVALUATE AFTER FINETUNE (same pipeline)
# ============================================================
print("\n===== 🔹 MODEL AFTER QLoRA FINETUNE =====")
ft_results = {}
for col in TARGETS:
    mae, mse = evaluate_target_simple(col)
    ft_results[col] = (mae, mse)

print("\n===== 📊 BASE vs QLoRA SUMMARY =====")
for col in TARGETS:
    b_mae, b_mse = base_results[col]
    f_mae, f_mse = ft_results[col]
    print(f"{col:20s}")
    print(f"  Base  MAE={b_mae:,.2f} | Scaled MSE={b_mse:.6f}")
    print(f"  QLoRA MAE={f_mae:,.2f} | Scaled MSE={f_mse:.6f}")
    print("-" * 50)


===== 🔹 BASE MODEL (before QLoRA) =====

🔍 Evaluating on 'new_cases' (last 30 pts)...

system

You are a model that predicts the next value in a numerical time series.
Given a sequence of numbers, output ONLY the next number.
No words, no explanation, just the number.
user

Sequence: 31797, 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150
Next number:
assistant

106ICYICYICYICYICYICYICY
[01] new_cases          | True=    106150 | Pred=     31797
system

You are a model that predicts the next value in a numerical time series.
Given a sequence of numbers, output ONLY the next number.
No words, no explanation, just the number.
user

Sequence: 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150, 106150
Next number:
assistant

106ICYICYICYICYICYICYICY
[02] new_cases          | True=    106150 | Pred=     31797
[03] new_cases    

Adding EOS to train dataset:   0%|          | 0/570 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/570 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/570 [00:00<?, ? examples/s]

Step,Training Loss
10,2.4927
20,1.3482
30,1.0447
40,1.0389
50,1.1577
60,1.1869


✅ QLoRA finetune complete.

===== 🔹 MODEL AFTER QLoRA FINETUNE =====

🔍 Evaluating on 'new_cases' (last 30 pts)...

system

You are a model that predicts the next value in a numerical time series.
Given a sequence of numbers, output ONLY the next number.
No words, no explanation, just the number.
user

Sequence: 31797, 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150
Next number:
assistant

106Insensitiveisay​

def.GraphicsUnit.GraphicsUnit.GraphicsUnit
[01] new_cases          | True=    106150 | Pred=     31797
system

You are a model that predicts the next value in a numerical time series.
Given a sequence of numbers, output ONLY the next number.
No words, no explanation, just the number.
user

Sequence: 31797, 31797, 31797, 31797, 51289, 51289, 51289, 51289, 51289, 51289, 51289, 78155, 78155, 78155, 78155, 78155, 78155, 78155, 106150, 106150
Next number:
assistant

106Insensitiveisay​

def.GraphicsUn