In [2]:
# I chose the Exponential Smoothing (Holt-Winters) model

# =========================
# file: model1.py
# =========================
# This script:
# 1) Loads the training and test data (local files if present; otherwise, URLs).
# 2) Builds a Holt-Winters Exponential Smoothing model ("model") — NOT fitted.
# 3) Fits the model -> "modelFit".
# 4) Produces "pred": a 1-D list of float predictions of length equal to test rows (744).
#
# Variables exposed for the autograder:
#   - model      : statsmodels.tsa.holtwinters.ExponentialSmoothing
#   - modelFit   : statsmodels.tsa.holtwinters.results.HoltWintersResults
#                  (or wrapper depending on statsmodels version)
#   - pred       : list[float] of length 744
#
# Notes:
#   * We set an explicit hourly frequency ('H') and fill rare gaps by interpolation.
#   * We try both daily (24) and weekly (168) seasonality and keep the better (by AIC).
#   * We keep everything numeric (float) and return a plain 1-D Python list for `pred`.

import os
from pathlib import Path
import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# ---------- 1) Load data ----------
try:
    HERE = os.path.dirname(__file__)
except NameError:
    HERE = os.getcwd()

TRAIN_PATH = os.path.join(HERE, "assignment_data_train.csv")
TEST_PATH  = os.path.join(HERE, "assignment_data_test.csv")

if os.path.exists(TRAIN_PATH) and os.path.exists(TEST_PATH):
    train = pd.read_csv(TRAIN_PATH, parse_dates=["Timestamp"])
    test  = pd.read_csv(TEST_PATH,  parse_dates=["Timestamp"])
else:
    TRAIN_URL = ("https://raw.githubusercontent.com/dustywhite7/"
                 "econ8310-assignment1/main/assignment_data_train.csv")
    TEST_URL  = ("https://raw.githubusercontent.com/dustywhite7/"
                 "econ8310-assignment1/main/assignment_data_test.csv")
    train = pd.read_csv(TRAIN_URL, parse_dates=["Timestamp"])
    test  = pd.read_csv(TEST_URL,  parse_dates=["Timestamp"])

# Sort and index
train = train.sort_values("Timestamp").set_index("Timestamp")

# Target series with explicit hourly frequency
y = pd.to_numeric(train["trips"], errors="coerce").asfreq("H")
if y.isna().any():
    # Fill any small gaps to keep a clean hourly series
    y = y.interpolate(limit_direction="both")

# ---------- 2) Build the algorithm object (UNFITTED) ----------
def _build(y_series, sp):
    # Using additive trend + additive seasonality; damped trend helps stability.
    # initialization_method="estimated" when available (older versions ignore it).
    try:
        m = ExponentialSmoothing(
            y_series,
            trend="add",
            damped_trend=True,
            seasonal="add",
            seasonal_periods=sp,
            initialization_method="estimated",
        )
    except TypeError:
        # Backward-compatibility for older statsmodels signatures
        m = ExponentialSmoothing(
            y_series,
            trend="add",
            damped_trend=True,
            seasonal="add",
            seasonal_periods=sp,
        )
    return m

# Try daily and weekly seasonality; pick the lower AIC
candidates = [24, 168]
_best = None
model = None  # will be set below

for sp in candidates:
    m = _build(y, sp)
    try:
        fit_tmp = m.fit(optimized=True, use_brute=True)
    except TypeError:
        fit_tmp = m.fit(optimized=True)
    aic = getattr(fit_tmp, "aic", np.inf)
    if (_best is None) or (aic < _best[0]):
        _best = (aic, sp, m, fit_tmp)

# Keep the chosen algorithm object as `model` (UNFITTED) and also the fitted version
# so that the autograder can verify both types separately.
_, _SP, model, modelFit = _best

# ---------- 3) Forecast ----------
h = len(test)
_fc = modelFit.forecast(steps=h)

# The grader expects a 1-D list/array of numeric values, length 744 (Jan has 31*24).
# Use float so it can handle any missing-number logic cleanly.
pred = np.asarray(_fc, dtype=float).ravel().tolist()


  y = pd.to_numeric(train["trips"], errors="coerce").asfreq("H")
