In [17]:
# I chose the Exponential Smoothing (Holt-Winters) model

# assignment1.py

import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# 读取数据
TRAIN_URL = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv"
TEST_URL = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_test.csv"

# 读取训练数据
train = pd.read_csv(TRAIN_URL, parse_dates=["Timestamp"])
train = train.sort_values("Timestamp").set_index("Timestamp").asfreq("H")

# 处理目标变量 - 转换为数值并处理缺失值
y_train = pd.to_numeric(train["trips"], errors="coerce")
y_train = y_train.interpolate(method="time", limit_direction="both")

# 创建并拟合指数平滑模型
model = ExponentialSmoothing(
    y_train,
    trend="add",
    seasonal="add",
    seasonal_periods=24*7,  # 每周季节性 (24小时*7天)
    initialization_method="estimated"
)

modelFit = model.fit(optimized=True, use_brute=True)

# 读取测试数据
test = pd.read_csv(TEST_URL, parse_dates=["Timestamp"])
test = test.sort_values("Timestamp").set_index("Timestamp").asfreq("H")

# 生成预测 (744小时，即2019年1月)
h = len(test)
forecast = modelFit.forecast(steps=h)

# 确保预测结果是长度为744的一维numpy数组
pred = np.asarray(forecast, dtype=float).ravel()

# 确保预测结果没有负值（出租车行程数不能为负）
pred = np.maximum(pred, 0)

# 5) 拟合
modelFit = model.fit(optimized=True, use_brute=True)

# 6) 预测：和测试集等长（测试集有 744 行）
h = len(test)
forecast = modelFit.forecast(steps=h)

# 7) 按测试要求输出：一维数值 numpy 数组（长度 744）
pred = np.asarray(forecast, dtype=float).ravel()


  train = train.sort_values("Timestamp").set_index("Timestamp").asfreq("H")
  test = test.sort_values("Timestamp").set_index("Timestamp").asfreq("H")
