In [19]:
# I chose the Exponential Smoothing (Holt-Winters) model

# assignment1.py - Google Colab版本

# 首先安装必要的库
!pip install statsmodels

import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# 读取数据
TRAIN_URL = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv"
TEST_URL = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_test.csv"

# 读取训练数据
train = pd.read_csv(TRAIN_URL, parse_dates=["Timestamp"])
train = train.sort_values("Timestamp").set_index("Timestamp").asfreq("H")

# 处理目标变量 - 转换为数值并处理缺失值
y_train = pd.to_numeric(train["trips"], errors="coerce")
y_train = y_train.interpolate(method="time", limit_direction="both")

# 创建指数平滑模型 (确保model变量是未拟合的模型)
model = ExponentialSmoothing(
    y_train,
    trend="add",
    seasonal="add",
    seasonal_periods=24,  # 使用24小时作为季节性周期
    initialization_method="estimated"
)

# 拟合模型 (确保modelFit变量是拟合后的模型)
modelFit = model.fit(optimized=True, use_brute=True)

# 读取测试数据
test = pd.read_csv(TEST_URL, parse_dates=["Timestamp"])

# 生成预测 (744小时，即2019年1月)
h = len(test)
forecast = modelFit.forecast(steps=h)

# 确保预测结果是长度为744的一维numpy数组
# 使用ravel()确保是一维数组，并转换为浮点数类型
pred = np.asarray(forecast, dtype=float).ravel()

# 确保预测结果没有负值（出租车行程数不能为负）
pred = np.maximum(pred, 0)

# 输出预测结果的前10个值以供检查
print("预测结果的前10个值:", pred[:10])
print("预测结果的长度:", len(pred))
print("预测结果的类型:", type(pred))



  train = train.sort_values("Timestamp").set_index("Timestamp").asfreq("H")


预测结果的前10个值: [ 5606.1097574   3577.85981238  2565.74546374  2151.00488427
  2202.56732959  2917.31114077  6568.02460355 10397.79786126
 12091.00078948 11466.7139307 ]
预测结果的长度: 744
预测结果的类型: <class 'numpy.ndarray'>
