In [14]:
# I chose the Exponential Smoothing (Holt-Winters) model


# assignment1.py — Exponential Smoothing (Holt-Winters)

from __future__ import annotations

import os
import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing as ES

# ---------- 1) resolve data paths (local first, fallback to GitHub) ----------
LOCAL_TRAIN = "assignment_data_train.csv"
LOCAL_TEST  = "assignment_data_test.csv"

RAW_TRAIN = (
    "https://raw.githubusercontent.com/dustywhite7/econ8310-assignment1/main/"
    "assignment_data_train.csv"
)
RAW_TEST = (
    "https://raw.githubusercontent.com/dustywhite7/econ8310-assignment1/main/"
    "assignment_data_test.csv"
)

TRAIN_PATH = LOCAL_TRAIN if os.path.exists(LOCAL_TRAIN) else RAW_TRAIN
TEST_PATH  = LOCAL_TEST  if os.path.exists(LOCAL_TEST)  else RAW_TEST


# ---------- 2) load & preprocess ----------
def _load_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path, parse_dates=["Timestamp"])
    # ensure datetime index & hourly frequency
    df = df.sort_values("Timestamp").set_index("Timestamp").asfreq("H")
    return df

train_df = _load_df(TRAIN_PATH)
test_df  = _load_df(TEST_PATH)

# target series as numeric, fix any non-numeric -> NaN, then time interpolation
y_train = pd.to_numeric(train_df["trips"], errors="coerce")
# if any missing values exist (coerce or gaps), fill in time-consistent way
y_train = y_train.interpolate(method="time", limit_direction="both")


# ---------- 3) build & fit model ----------
# Additive trend + additive daily seasonality (24 hours)
model: ES = ES(
    y_train,
    trend="add",
    seasonal="add",
    seasonal_periods=24,
    initialization_method="estimated",
)

# Use default optimizers; use_brute can help but increases time — keep True to match earlier advice
modelFit = model.fit(optimized=True, use_brute=True)


# ---------- 4) forecast ----------
# Forecast for the length of test set (January 2019 has 744 hours)
h = len(test_df)
forecast = modelFit.forecast(steps=h)

# ---------- 5) shape and type expected by the tests ----------
# 1-D numpy array of floats, non-negative
pred = np.asarray(forecast, dtype=float).ravel()
pred = np.maximum(pred, 0.0)

# Debug: check prediction output
print("DEBUG -----")
print("Type of model:", type(model))
print("Type of modelFit:", type(modelFit))
print("Type of pred:", type(pred))
print("Shape of pred:", pred.shape if hasattr(pred, "shape") else len(pred))
print("First 10 predictions:", pred[:10])
print("Min pred value:", np.min(pred), "Max pred value:", np.max(pred))
print("Length of test_df:", len(test_df), "Length of pred:", len(pred))
print("DEBUG -----")






  df = df.sort_values("Timestamp").set_index("Timestamp").asfreq("H")
  df = df.sort_values("Timestamp").set_index("Timestamp").asfreq("H")


DEBUG -----
Type of model: <class 'statsmodels.tsa.holtwinters.model.ExponentialSmoothing'>
Type of modelFit: <class 'statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper'>
Type of pred: <class 'numpy.ndarray'>
Shape of pred: (744,)
First 10 predictions: [ 5606.1097574   3577.85981238  2565.74546374  2151.00488427
  2202.56732959  2917.31114077  6568.02460355 10397.79786126
 12091.00078948 11466.7139307 ]
Min pred value: 0.0 Max pred value: 12626.55889461915
Length of test_df: 744 Length of pred: 744
DEBUG -----
