In [35]:
# ======================================================
# Hong Kong — Seasonal-Naive (Lag-12) Forecast
# ======================================================
import numpy as np
import pandas as pd
from math import sqrt
from sklearn.metrics import r2_score, mean_absolute_error

# -----------------------------
# 1) Load Data
# -----------------------------
CSV_PATH = "/Users/aman/Downloads/bangkok_2015_2024_final.csv"
df = pd.read_csv(CSV_PATH, parse_dates=["date"]).sort_values("date")
df = df.set_index("date").asfreq("MS")

# Ensure numeric
df["arrivals"] = pd.to_numeric(df["visitor_arrivals"], errors="coerce")

# -----------------------------
# 2) Build Seasonal-Naive Forecast
# -----------------------------
# Lag-12 arrivals as prediction
df["snaive"] = df["arrivals"].shift(12)

# Recovery-based split
train_mask = ((df.index >= "2017-01-01") & (df.index <= "2019-12-01")) | \
             ((df.index >= "2023-01-01") & (df.index <= "2024-08-01"))
test_mask  =  (df.index >= "2024-09-01") & (df.index <= "2024-12-01")

y_true = df.loc[test_mask, "arrivals"]
y_pred = df.loc[test_mask, "snaive"]

# -----------------------------
# 3) Metrics
# -----------------------------
def evaluate(y_true, y_pred, name="Seasonal-Naive"):
    r2   = r2_score(y_true, y_pred)
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = sqrt(np.mean((y_true - y_pred)**2))
    mape = float(np.mean(np.abs((y_true - y_pred) / y_true)) * 100)
    print(f"\n{name} (Bangkok, Test=Sep–Dec 2024)")
    print(f"R²   : {r2:.3f}")
    print(f"MAE  : {mae:,.0f}")
    print(f"RMSE : {rmse:,.0f}")
    print(f"MAPE : {mape:.2f}%")
    return {"R2": r2, "MAE": mae, "RMSE": rmse, "MAPE": mape}

metrics = evaluate(y_true, y_pred)

# -----------------------------
# 4) Month-by-Month Table
# -----------------------------
out = pd.DataFrame({
    "date": y_true.index,
    "actual": y_true.astype(int),
    "snaive_pred": np.round(y_pred).astype(int)
}).set_index("date")

out["abs_err"] = (out["actual"] - out["snaive_pred"]).abs()
out["ape_%"]   = (out["abs_err"] / out["actual"] * 100).round(2)

print("\n=== 2024 Sep–Dec — Bangkok (Seasonal-Naive) ===")
print(out)



Seasonal-Naive (Bangkok, Test=Sep–Dec 2024)
R²   : -0.045
MAE  : 437,978
RMSE : 442,244
MAPE : 14.97%

=== 2024 Sep–Dec — Bangkok (Seasonal-Naive) ===
             actual  snaive_pred  abs_err  ape_%
date                                            
2024-09-01  2521010      2130600   390410  15.49
2024-10-01  2679180      2197020   482160  18.00
2024-11-01  3150240      2637080   513160  16.29
2024-12-01  3627440      3261260   366180  10.09
