In [16]:
# I chose the Exponential Smoothing (Holt-Winters) model


# assignment1.py  — Exponential Smoothing solution (Econ 8310 A1)

import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# ---------------------------------------------------------------------
# Data locations (use raw.githubusercontent.com for reliability)
TRAIN_URL = "https://raw.githubusercontent.com/dustywhite7/econ8310-assignment1/main/assignment_data_train.csv"
TEST_URL  = "https://raw.githubusercontent.com/dustywhite7/econ8310-assignment1/main/assignment_data_test.csv"


def _load_hourly_df(url: str) -> pd.DataFrame:
    """Read hourly CSV with a Timestamp column, return time-indexed DataFrame."""
    df = pd.read_csv(url, parse_dates=["Timestamp"])
    # Ensure chronological order, proper DateTimeIndex and 1-hour frequency
    df = df.sort_values("Timestamp").set_index("Timestamp").asfreq("h")
    return df


# --------------------------- load + prep data --------------------------
_train_df = _load_hourly_df(TRAIN_URL)
_test_df  = _load_hourly_df(TEST_URL)

# Target: coerce non-numeric to NaN, then fill gaps via time interpolation
y_train = pd.to_numeric(_train_df["trips"], errors="coerce")
y_train = y_train.interpolate(method="time", limit_direction="both")

# ------------------------- build the (unfitted) model ------------------
model = ExponentialSmoothing(
    y_train,
    trend="add",
    seasonal="add",
    seasonal_periods=24,           # daily seasonality (hourly data)
    initialization_method="estimated",
)

# ------------------------------ fit model ------------------------------
# Expose the fitted model object for the autograder
modelFit = model.fit(optimized=True, use_brute=True)

# ------------------------------ forecast -------------------------------
h = len(_test_df)                 # should be 744 hours for Jan 2019
_forecast = modelFit.forecast(steps=h)

# Predictions as 1D numpy array of float, with no negative values
pred = np.asarray(_forecast, dtype=float).ravel()
pred = np.maximum(pred, 0.0)

# (Optionally declare explicit exports; some graders import these names)
__all__ = ["model", "modelFit", "pred"]






Type of model: <class 'statsmodels.tsa.holtwinters.model.ExponentialSmoothing'>
Type of modelFit: <class 'statsmodels.tsa.holtwinters.results.HoltWintersResultsWrapper'>
Type of pred: <class 'numpy.ndarray'>
Shape of pred: (744,)
First 5 predictions: [5606.1097574  3577.85981238 2565.74546374 2151.00488427 2202.56732959]
Min pred: 0.0 Max pred: 12626.55889461915
Length of test_df: 744
Length of pred: 744
