# I chose the Exponential Smoothing (Holt-Winters) model


import os
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# 1. Load data
base_dir = os.getcwd()   
train_file = os.path.join(base_dir, "assignment_data_train.csv")
test_file = os.path.join(base_dir, "assignment_data_test.csv")

if os.path.exists(train_file) and os.path.exists(test_file):
    train_df = pd.read_csv(train_file)
    test_df = pd.read_csv(test_file)
else:
    raise FileNotFoundError("Cannot find training/testing CSV files in current or parent directory.")

# 2. Prepare data
train_df = train_df.sort_values("Timestamp").set_index("Timestamp")
test_df = test_df.sort_values("Timestamp").set_index("Timestamp")

train_y = train_df["SystemLoadEA"].astype(float).values
test_y = test_df["SystemLoadEA"].astype(float).values

# 3. Train model
model = ExponentialSmoothing(train_y, trend="add", seasonal="add", seasonal_periods=24)
modelFit = model.fit()

# 4. Forecast
pred = np.asarray(modelFit.forecast(len(test_y)))

import os
print(os.listdir())



In [26]:
import os
print(os.listdir())




['.config', 'assignment_data_test.csv', 'assignment_data_train.csv', 'sample_data']


In [28]:
import pandas as pd

# 读两份 CSV（你当前目录已经能看到它们）
train_df = pd.read_csv("assignment_data_train.csv")
test_df  = pd.read_csv("assignment_data_test.csv")

print("Train columns:", list(train_df.columns))
print("Test  columns:", list(test_df.columns))
print()
print(train_df.head(3))



Train columns: ['Timestamp', 'year', 'month', 'day', 'hour', 'trips']
Test  columns: ['Timestamp', 'year', 'month', 'day', 'hour']

             Timestamp  year  month  day  hour  trips
0  2018-01-01 00:00:00  2018      1    1     0  16714
1  2018-01-01 01:00:00  2018      1    1     1  19041
2  2018-01-01 02:00:00  2018      1    1     2  16590


In [31]:
import numpy as np
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# 1) Load
train_df = pd.read_csv("assignment_data_train.csv", parse_dates=["Timestamp"])
test_df  = pd.read_csv("assignment_data_test.csv",  parse_dates=["Timestamp"])

# 2) Prep
train_df = train_df.sort_values("Timestamp").set_index("Timestamp").asfreq("h")
test_df  = test_df.sort_values("Timestamp").set_index("Timestamp").asfreq("h")

y_train = pd.to_numeric(train_df["trips"], errors="coerce")
y_train = y_train.interpolate(method="time", limit_direction="both").astype(float)

# 3) Model (weekly seasonality)
model = ExponentialSmoothing(
    y_train,
    trend="add",
    damped_trend=True,
    seasonal="add",
    seasonal_periods=168,
    initialization_method="estimated"
)

# 4) Fit
modelFit = model.fit(optimized=True, use_brute=True)

# 5) Forecast
h = len(test_df)
forecast = modelFit.forecast(steps=h)

# 6) Contract for autograder
pred = np.asarray(forecast, dtype=float).ravel()
pred = np.maximum(pred, 0.0)


Train shape: (8760, 5) | Test shape: (744, 4)
Prediction length: 744
First 5 predictions: [4895.00823356 2455.68666431 1342.61929383  945.63071185 1297.2479594 ]
