In [None]:
# ─────────────────────────────────────────────────────────────────────────────
# 0️⃣ Install / upgrade required packages
# (Run this cell first in Colab Pro)
!pip install -q --upgrade transformers accelerate bitsandbytes torch

# ─────────────────────────────────────────────────────────────────────────────


In [None]:
# ────────────────────────────────────────────────────────────────────────────
# 1️⃣ Imports & device
import os, re, time
import pandas as pd
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# If you need to authenticate for private or gated models:
token = os.getenv("HF_TOKEN")  # ✅ much safer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


In [None]:
# ────────────────────────────────────────────────────────────────────────────
# 2️⃣ Load & slice one year of data (8 640 hourly temps)
df = (
    pd.read_csv("weatherHistory.csv", parse_dates=["Formatted Date"])
      .sort_values("Formatted Date")
      .reset_index(drop=True)
)
FEATURE = "Temperature (C)"
LOOKBACK = 8640   # 1 year of hourly samples
HORIZON  = 720    # 30 days × 24 h

temps = df[FEATURE].values[-LOOKBACK:]
assert temps.shape[0] == LOOKBACK
# ────────────────────────────────────────────────────────────────────────────
# 3️⃣ Build the direct multi‑step prompt
# WARNING: this is very long—ensure your context size can hold ~8640 tokens + space for 720 outputs.
temps_list = [round(float(x),1) for x in temps]
# join without spaces to save tokens; you can also downsample if you hit truncation
input_str = ",".join(str(x) for x in temps_list)

prompt = (
    f"Given the past {LOOKBACK} hourly temperatures: [{input_str}], "
    f"predict the next {HORIZON} hourly temperatures. "
    "Reply with a comma‑separated list of numbers."
)
# ────────────────────────────────────────────────────────────────────────────
# 4️⃣ Load Mistral‑7B FP16 via Accelerate
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
token = os.environ["HUGGINGFACE_HUB_TOKEN"]

tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    token=os.environ["HUGGINGFACE_HUB_TOKEN"],
    torch_dtype=torch.float16,
    device_map="cuda"   # ⚠️ this loads everything into GPU
)


model.eval()
# ────────────────────────────────────────────────────────────────────────────
# 5️⃣ Tokenize, generate, and time it
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    max_length=8192  # adjust if you have a longer context window
).to(device)

start = time.time()
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=4096,   # allow space for up to ~4k tokens of answers
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )
elapsed = time.time() - start
print(f"Single-shot generation took {elapsed:.1f} seconds")
# ────────────────────────────────────────────────────────────────────────────
# 6️⃣ Decode & parse out 720 floats
resp = tokenizer.decode(outputs[0], skip_special_tokens=True)
nums = re.findall(r"[-+]?\d*\.\d+|\d+", resp)
preds = [float(x) for x in nums[:HORIZON]]

print(f"Parsed {len(preds)} hourly predictions:")
print(preds)


In [None]:
preds

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Convert to numpy arrays
actual = np.array(next_month_actual)
preds = np.array(llm_preds)

# Remove extreme outliers from preds (e.g., values > 100 or < -50)
preds_cleaned = np.clip(preds, -50, 100)

# Time index
hours = np.arange(len(actual))

# Plot
plt.figure(figsize=(15, 6))
plt.plot(hours, actual, label="Actual Temperatures", color='blue')
plt.plot(hours, preds_cleaned, label="LLM Forecast (Mistral-7B)", color='orange', linestyle='--')
plt.xlabel("Hour (from forecast start)")
plt.ylabel("Temperature (°C)")
plt.title("30-Day Hourly Temperature Forecast: LLM vs Actual")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Ensure predictions are safe for plotting
llm_preds = np.array(llm_preds)
llm_preds_clipped = np.clip(llm_preds, -50, 100)

plt.figure(figsize=(15, 6))
plt.plot(hours, next_month_actual, label="Actual Temperatures", color='blue')
plt.plot(hours, llm_preds_clipped, label="LLM Forecast (Mistral-7B)", color='orange', linestyle='--')
plt.xlabel("Hour (from forecast start)")
plt.ylabel("Temperature (°C)")
plt.title("30-Day Hourly Temperature Forecast: LLM (Mistral-7B) vs Actual")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
!pip install prophet -q


In [None]:
from prophet import Prophet
import pandas as pd

# Load and sort the dataset
df = pd.read_csv("weatherHistory.csv", parse_dates=["Formatted Date"])
df = df.sort_values("Formatted Date").reset_index(drop=True)

# Step 1: Ensure datetime is parsed correctly and convert to UTC first
df["Formatted Date"] = pd.to_datetime(df["Formatted Date"], utc=True)

# Step 2: Then remove timezone (convert to naive datetime)
df["Formatted Date"] = df["Formatted Date"].dt.tz_localize(None)

# Step 3: Prepare dataframe for Prophet
df_prophet = df[["Formatted Date", "Temperature (C)"]].rename(
    columns={"Formatted Date": "ds", "Temperature (C)": "y"}
)

# Split data into train and test (last 720 hours as test set)
train_df = df_prophet[:-720]
test_df = df_prophet[-720:]

# Step 4: Fit Prophet model
model = Prophet()
model.fit(train_df)

# Step 5: Forecast next 720 hours
future = model.make_future_dataframe(periods=720, freq='H')
forecast = model.predict(future)

# Step 6: Extract predictions
prophet_preds = forecast[-720:]["yhat"].values
next_month_actual = test_df["y"].values

print("✅ Forecast complete. Prophet predicted", len(prophet_preds), "values.")


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import numpy as np

# Create time axis
time = np.arange(720)

# Clean LLM predictions (replace with your actual LLM output variable if needed)
llm_preds_array = np.array(llm_preds)
llm_preds_cleaned = np.clip(llm_preds_array, -50, 100)  # clip outliers for safe plot

# ========== 📊 PLOT 1: Prophet vs Actual ==========
plt.figure(figsize=(15, 5))
plt.plot(time, next_month_actual, label="Actual", color="black")
plt.plot(time, prophet_preds, label="Prophet", color="green", linestyle="--")
plt.title("📈 Prophet vs Actual Temperature Forecast")
plt.xlabel("Hour")
plt.ylabel("Temperature (°C)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# ========== 📊 PLOT 2: LLM vs Actual ==========
plt.figure(figsize=(15, 5))
plt.plot(time, next_month_actual, label="Actual", color="black")
plt.plot(time, llm_preds_cleaned, label="Mistral LLM", color="orange", linestyle="--")
plt.title("📈 LLM vs Actual Temperature Forecast")
plt.xlabel("Hour")
plt.ylabel("Temperature (°C)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# ========== 📊 PLOT 3: Prophet vs LLM vs Actual ==========
plt.figure(figsize=(15, 5))
plt.plot(time, next_month_actual, label="Actual", color="black")
plt.plot(time, prophet_preds, label="Prophet", color="green", linestyle="--")
plt.plot(time, llm_preds_cleaned, label="Mistral LLM", color="orange", linestyle=":")
plt.title("📈 Comparison: Prophet vs LLM vs Actual")
plt.xlabel("Hour")
plt.ylabel("Temperature (°C)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# ========== 📐 METRICS ==========
def print_metrics(name, y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))  # Manual RMSE
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"\n🔎 {name} Metrics:")
    print(f"  RMSE : {rmse:.4f}")
    print(f"  MAE  : {mae:.4f}")
    print(f"  R²   : {r2:.4f}")

print_metrics("Prophet", next_month_actual, prophet_preds)
print_metrics("Mistral LLM", next_month_actual, llm_preds_cleaned)



In [None]:
import pandas as pd

# Create DataFrame with hourly index
hours = list(range(720))

# Save predictions and actual values
results_df = pd.DataFrame({
    "Hour": hours,
    "Actual_Temp": next_month_actual,
    "Prophet_Pred": prophet_preds,
    "LLM_Pred": llm_preds_cleaned
})

# Save to CSV
results_df.to_csv("temperature_forecast_comparison.csv", index=False)
print("✅ Saved to temperature_forecast_comparison.csv")
