In [19]:
# I chose the Exponential Smoothing (Holt-Winters) model


import os
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing

train_file = "assignment_data_train.csv"
test_file = "assignment_data_test.csv"

train_url = "https://raw.githubusercontent.com/dustywhite7/econ8310-assignment1/main/assignment_data_train.csv"
test_url = "https://raw.githubusercontent.com/dustywhite7/econ8310-assignment1/main/assignment_data_test.csv"

# Load train
if os.path.exists(train_file):
    train_df = pd.read_csv(train_file, parse_dates=["Timestamp"])
else:
    train_df = pd.read_csv(train_url, parse_dates=["Timestamp"])

# Load test
if os.path.exists(test_file):
    test_df = pd.read_csv(test_file, parse_dates=["Timestamp"])
else:
    test_df = pd.read_csv(test_url, parse_dates=["Timestamp"])

# Preprocess
train_df = train_df.sort_values("Timestamp").set_index("Timestamp").asfreq("h")
y_train = pd.to_numeric(train_df["trips"], errors="coerce").interpolate(method="time")

# Model
model = ExponentialSmoothing(
    y_train,
    trend="add",
    seasonal="add",
    seasonal_periods=24,
    initialization_method="estimated"
)

# Fit
modelFit = model.fit(optimized=True, use_brute=True)

# Forecast
h = len(test_df)
forecast = modelFit.forecast(steps=h)

pred = np.asarray(forecast, dtype=float).ravel()
pred = np.maximum(pred, 0)

# Debug / check (only in ipynb)
print("Train shape:", train_df.shape)
print("Test shape:", test_df.shape)
print("Prediction length:", len(pred))
print("First 5 predictions:", pred[:5])


Train shape: (8760, 5)
Test shape: (744, 5)
Prediction length: 744
First 5 predictions: [5606.1097574  3577.85981238 2565.74546374 2151.00488427 2202.56732959]
