In [None]:
# 📦 安装必要依赖（首次运行时执行一次即可）
%pip install nixtlats pandas dask bottleneck pyarrow utilsforecast --quiet

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 加载数据
file_path = "solar_AL.txt.gz"
df = pd.read_csv(file_path, compression='gzip', header=None)

# 构造时间索引（10分钟间隔，2006年一年有 365*24*6 = 52560 条记录）
start_date = "2006-01-01"
periods = df.shape[0]
time_index = pd.date_range(start=start_date, periods=periods, freq="10min")

# 对137个电站取平均，作为总体发电量指标
df["mean_power"] = df.mean(axis=1)

# 构造时间序列 DataFrame
data = pd.DataFrame({
    "ds": time_index,
    "y": df["mean_power"].values
})

In [None]:
# 🤖 Step 4：使用 Nixtla TimeGPT 进行预测
from nixtlats import NixtlaClient

client = NixtlaClient(api_key="YOUR_NIXTLA_API_KEY_HERE"  # Get your free key at https://www.nixtla.io/)  # 🔑 请替换成你自己的 API key！

# 降采样为每小时：取每小时内的平均发电量
data_hourly = data.set_index("ds").resample("H").mean().reset_index()

# 训练集：2006年1月到6月
train_data = data_hourly[(data_hourly["ds"] >= "2006-01-01") & (data_hourly["ds"] < "2006-11-01")].reset_index(drop=True)

# 测试集：2006年7月
test_data = data_hourly[(data_hourly["ds"] >= "2006-11-01") & (data_hourly["ds"] < "2006-12-01")].reset_index(drop=True)

h = len(test_data)  # 预测步数等于测试集长度

forecast_df = client.forecast(
    df=train_data,
    h=h,
    freq="H",                   # 每小时预测
    time_col="ds",
    target_col="y",
    finetune_steps=100            # 暂不微调（也可以改成50试试）
)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd


# 重命名预测列名为 y（统一格式）
forecast_df = forecast_df.rename(columns={"TimeGPT": "y"})

# 合并测试集和预测结果
combined_df = pd.DataFrame({
    "ds": test_data["ds"],
    "Actual": test_data["y"],
    "Forecast": forecast_df["y"]
})

# 画图
plt.figure(figsize=(14, 5))
plt.plot(combined_df["ds"], combined_df["Actual"], label="Actual", alpha=0.7)
plt.plot(combined_df["ds"], combined_df["Forecast"], label="Forecast", alpha=0.7)
plt.title("☀️ 2006 - Solar Power Forecast vs Actual (Hourly)")
plt.xlabel("Date")
plt.ylabel("Solar Power Output (Hourly Average)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

mae = mean_absolute_error(combined_df["Actual"], combined_df["Forecast"])
rmse = np.sqrt(mean_squared_error(combined_df["Actual"], combined_df["Forecast"]))

print(f"📉 MAE: {mae:.4f}")
print(f"📉 RMSE: {rmse:.4f}")

In [None]:
# 计算每一时刻的误差
combined_df["Error"] = combined_df["Forecast"] - combined_df["Actual"]

plt.figure(figsize=(15, 4))
plt.plot(combined_df["ds"], combined_df["Error"], label="Forecast Error", color="orange")
plt.title("🧪 Forecast Error Over July 2006")
plt.xlabel("Date")
plt.ylabel("Error (Forecast - Actual)")
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nixtlats import NixtlaClient

# （可选）如果你要计算误差，再启用下面两行
# from sklearn.metrics import mean_absolute_error, mean_squared_error

# 1️⃣ 读取 5 月和 6 月数据
# 假设 CSV 含两列：'ds'（时间戳, ISO 格式）和 'y'（发电量）
may = pd.read_csv("May_h.csv", encoding="latin1")
june = pd.read_csv("June_h.csv", encoding="latin1")

# 列名举例：['序号', '时间', '功率1', '功率2']
# 取“时间”列（第二列）和“功率1”列（第三列）作为 y
ds_col = may.columns[1]
y_col  = may.columns[2]

may = may[[ds_col, y_col]].copy()
june = june[[ds_col, y_col]].copy()

# 重命名
may.columns  = ["ds", "y"]
june.columns = ["ds", "y"]

# 解析时间
may["ds"]  = pd.to_datetime(may["ds"])
june["ds"] = pd.to_datetime(june["ds"])


# 3️⃣ 调用模型：用 5 月数据训练，预测 6 月
h = len(june)  # 6 月总小时数

forecast = client.forecast(
    df=may,
    h=h,
    freq="H",
    time_col="ds",
    target_col="y",
    finetune_steps=0
)

# 4️⃣ 整理预测结果
june_pred = pd.DataFrame({
    "ds":       june["ds"],
    "Actual":   june["y"],
    "Forecast": forecast["TimeGPT"].values
})

# 5️⃣ 可视化：6 月 实际 vs 预测
plt.figure(figsize=(12,4))
plt.plot(june_pred["ds"], june_pred["Actual"],  label="Actual (June)",  alpha=0.7)
plt.plot(june_pred["ds"], june_pred["Forecast"], label="Forecast (June)", alpha=0.7)
plt.title("TimeGPT: Train on May → Forecast June (Hourly)")
plt.xlabel("Date")
plt.ylabel("Power Output")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# 6️⃣ （可选）计算并打印误差
# mae  = mean_absolute_error(june_pred["Actual"], june_pred["Forecast"])
# rmse = np.sqrt(mean_squared_error(june_pred["Actual"], june_pred["Forecast"]))
# print(f"MAE:  {mae:.4f}")
# print(f"RMSE: {rmse:.4f}")