In [7]:
import os
os.environ["LOKY_MAX_CPU_COUNT"] = "20"  # ← 改成你的物理核数
os.environ["OMP_NUM_THREADS"] = "1"

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from utils2 import *

np.random.seed(47)

# Parameter

train_ratio = 0.72
val_ratio   = 0.18

# 加载原始数据 
load_data   = np.load('user_load.npz')
PV_data     = np.load('PV_data.npz')
Radiation   = np.load('Radiation.npz')

load_data   = load_data['data']
PV_data     = PV_data['PV']
Radiation   = Radiation['Radiation'][:365*48]
num_user    = 400
# 保证Prosumer数据不会出现负数
load_data  += 1

# 放缩光伏功率数据
x_min = np.min(PV_data, axis=1, keepdims=True)  # 每行最小值
x_max = np.max(PV_data, axis=1, keepdims=True)  # 每行最大值
PV_data = 1.05 * (PV_data - x_min) / (x_max - x_min)

# 处理Prosumer和Consumer数据
user_list = np.arange(num_user)
site_list = np.arange(int(len(user_list)*0.1))
prosumer_list = np.random.choice(user_list, size=int(len(user_list)*0.1), replace=False)
consumer_list = np.setdiff1d(user_list, prosumer_list)

load_data   = load_data[user_list]
PV_data     = PV_data[site_list][:,:365*48]
load_data[prosumer_list] -= PV_data
load_data = np.clip(load_data, 0, None)



# 生成时间序列（从 2010-01-01 开始）
date_rng = pd.date_range('2010-01-01', periods=365*24, freq='60min')

# 提取 weekday（0=Mon, 6=Sun）
weekday = date_rng.weekday.values  # shape: (17520,)

# One-hot 编码 (7 类)
weekday_onehot = np.eye(7)[weekday]  # shape: (17520, 7)

# 去掉一列以消除共线性（例如去掉 Sunday 列）
weekday_onehot = weekday_onehot[:, 1:]  # shape: (17520, 6)

# 计算每个时间步对应的小时数
# 0, 0.5, 1.0, ... , 23.5, 然后循环
hours = np.arange(365*24) % 24

# 生成正弦/余弦特征（24小时周期）
hour_sin = np.sin(2 * np.pi * hours / 24).reshape(-1,1)
hour_cos = np.cos(2 * np.pi * hours / 24).reshape(-1,1)

Calendar = np.concatenate((weekday_onehot, hour_sin), axis=1)
Calendar = np.concatenate((Calendar, hour_cos), axis=1)

load_data = load_data.reshape(num_user, 2,-1).sum(axis=1)
Radiation = Radiation.reshape(2,-1).sum(axis=0)

In [8]:
# K-Means
MAE_list = []
MAPE_list = []
# 5
for K in range(9,10):
    save_path   = "Process_3\\K-means\\"
    os.makedirs(save_path,exist_ok=True)

    kmeans      = KMeans(n_clusters=K,random_state=49,n_init='auto')
    k_data      = load_data[:,:int(load_data.shape[1]*train_ratio)]

    kmeans.fit(k_data)
    cluster_list = np.array(kmeans.labels_)
    aggr_data = aggregate_by_cluster_list(load=load_data,labels=cluster_list,num_cluster=K, agg='sum')
    model_dict = train_model(cluster_num=K, cluster_list=cluster_list, aggr_data=aggr_data, 
                                    lag=24,train_ratio=train_ratio, Radiation=Radiation,Calendar=Calendar)

    predict_result, test_result = Predict(cluster_num=K,cluster_list=cluster_list,
                                        aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                        val_ratio=val_ratio,Radiation=Radiation,
                                        Calendar=Calendar,model_dict=model_dict)

    predict_result = predict_result.sum(axis=0)
    test_result = test_result.sum(axis=0)

    np.save(save_path+"predict_result.npy",predict_result)
    np.save(save_path+"test_result.npy",test_result)
    
    MAE  = np.mean(np.abs(predict_result-test_result))
    MAPE = np.mean(np.abs(predict_result-test_result)/test_result)*100
    
    MAE_list.append(MAE)
    MAPE_list.append(MAPE)
MAPE_list = np.asarray(MAPE_list)
MAE_list = np.asarray(MAE_list)

print("最小MAE聚类数",np.argmin(MAE_list)+1,"最小MAE",np.min(MAE_list))
print("最小MAPE聚类数",np.argmin(MAPE_list)+1,"最小MAPE",np.min(MAPE_list))

# np.save(save_path+"predict_result.npy",predict_result)
# np.save(save_path+"test_result.npy",test_result)

# 最小MAE聚类数 5 最小MAE 126.30187904876584
# 最小MAPE聚类数 5 最小MAPE 0.7250700345199731

最小MAE聚类数 1 最小MAE 61.120310577180895
最小MAPE聚类数 1 最小MAPE 4.233868116247346


In [9]:
# GMM
# K = 10
save_path   = "Process_3\\GMM\\"
os.makedirs(save_path,exist_ok=True)

MAE_list = []
MAPE_list = []
# 3
for K in range(14,15):
    gmm = GaussianMixture(n_components=K, covariance_type="diag", n_init=3, random_state=49)
    gmm.fit(load_data[:,:int(load_data.shape[1]*train_ratio)])
    cluster_list = np.array(gmm.predict(load_data[:,:int(load_data.shape[1]*train_ratio)]))

    aggr_data = aggregate_by_cluster_list(load=load_data,labels=cluster_list,num_cluster=K, agg='sum')
    model_dict = train_model(cluster_num=K, cluster_list=cluster_list, aggr_data=aggr_data, 
                                    lag=24,train_ratio=train_ratio, Radiation=Radiation,Calendar=Calendar)

    predict_result, test_result = Predict(cluster_num=K,cluster_list=cluster_list,
                                        aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                        val_ratio=val_ratio,Radiation=Radiation,
                                        Calendar=Calendar,model_dict=model_dict)

    predict_result = predict_result.sum(axis=0)
    test_result = test_result.sum(axis=0)

    np.save(save_path+"predict_result.npy",predict_result)
    np.save(save_path+"test_result.npy",test_result)

    MAE  = np.mean(np.abs(predict_result-test_result))
    MAPE = np.mean(np.abs(predict_result-test_result)/test_result)*100

    MAE_list.append(MAE)
    MAPE_list.append(MAPE)
    print("Epoch:",K,"FINISHED")

MAPE_list = np.asarray(MAPE_list)
MAE_list = np.asarray(MAE_list)

print("最小MAE聚类数",np.argmin(MAE_list)+1,"最小MAE",np.min(MAE_list))
print("最小MAPE聚类数",np.argmin(MAPE_list)+1,"最小MAPE",np.min(MAPE_list))

# np.save(save_path+"predict_result.npy",predict_result)
# np.save(save_path+"test_result.npy",test_result)

Epoch: 14 FINISHED
最小MAE聚类数 1 最小MAE 61.20740653965068
最小MAPE聚类数 1 最小MAPE 4.238647920917969


In [10]:
# Up_to_down
K = 1
save_path   = "Process_3\\Up_to_down\\"
os.makedirs(save_path,exist_ok=True)

kmeans      = KMeans(n_clusters=K,random_state=49,n_init='auto')
k_data      = load_data[:,:int(load_data.shape[1]*train_ratio)]

kmeans.fit(k_data)
cluster_list = np.array(kmeans.labels_)
aggr_data = aggregate_by_cluster_list(load=load_data,labels=cluster_list,num_cluster=K, agg='sum')
model_dict = train_model(cluster_num=K, cluster_list=cluster_list, aggr_data=aggr_data, 
                                lag=24,train_ratio=train_ratio, Radiation=Radiation,Calendar=Calendar)

predict_result, test_result = Predict(cluster_num=K,cluster_list=cluster_list,
                                    aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                    val_ratio=val_ratio,Radiation=Radiation,
                                    Calendar=Calendar,model_dict=model_dict)

# predict_result = predict_result.sum(axis=0)
# test_result = test_result.sum(axis=0)

np.save(save_path+"predict_result.npy",predict_result)
np.save(save_path+"test_result.npy",test_result)

In [11]:
# Bottom_to_Up
K = num_user
save_path   = "Process_3\\Bottom_to_Up\\"
os.makedirs(save_path,exist_ok=True)

kmeans      = KMeans(n_clusters=K,random_state=49,n_init='auto')
k_data      = load_data[:,:int(load_data.shape[1]*train_ratio)]

kmeans.fit(k_data)
cluster_list = np.array(kmeans.labels_)
aggr_data = aggregate_by_cluster_list(load=load_data,labels=cluster_list,num_cluster=K, agg='sum')
model_dict = train_model(cluster_num=K, cluster_list=cluster_list, aggr_data=aggr_data, 
                                lag=24,train_ratio=train_ratio, Radiation=Radiation,Calendar=Calendar)

predict_result, test_result = Predict(cluster_num=K,cluster_list=cluster_list,
                                    aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                    val_ratio=val_ratio,Radiation=Radiation,
                                    Calendar=Calendar,model_dict=model_dict)

predict_result = predict_result.sum(axis=0)
test_result = test_result.sum(axis=0)

np.save(save_path+"predict_result.npy",predict_result)
np.save(save_path+"test_result.npy",test_result)

In [12]:
# Ensemble
save_path   = "Process_3\\Ensemble\\"
os.makedirs(save_path,exist_ok=True)

predict_val_total = []
test_val_total = []
predict_result_total = []
test_result_total = []
for K in [i for i in range(1,20)]:
    
    kmeans = KMeans(n_clusters=K,random_state=49,n_init='auto')
    kmeans.fit(load_data[:,:int(load_data.shape[1]*train_ratio)])
    cluster_list = np.array(kmeans.labels_)
    aggr_data = aggregate_by_cluster_list(load=load_data,labels=cluster_list,num_cluster=K, agg='sum')
    model_dict = train_model(cluster_num=K, cluster_list=cluster_list, aggr_data=aggr_data, 
                            lag=24,train_ratio=train_ratio, Radiation=Radiation,Calendar=Calendar)
    predict_val, test_val = Predict_val(cluster_num=K,cluster_list=cluster_list,
                                        aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                        val_ratio=val_ratio,Radiation=Radiation,
                                        Calendar=Calendar,model_dict=model_dict)
    
    predict_val_total.append(predict_val.sum(axis=0))
    test_val_total.append(test_val.sum(axis=0))
    
    predict_result, test_result = Predict(cluster_num=K,cluster_list=cluster_list,
                                        aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                        val_ratio=val_ratio,Radiation=Radiation,
                                        Calendar=Calendar,model_dict=model_dict)
    predict_result_total.append(predict_result.sum(axis=0))
    test_result_total.append(test_result.sum(axis=0))

predict_val_total = np.asarray(predict_val_total).T
test_val_total = np.asarray(test_val_total).mean(axis=0).T

Combinator = LinearRegression().fit(X=predict_val_total,y=test_val_total)
predict_result_total = Combinator.predict(np.asarray(predict_result_total).T)

np.save(save_path+"predict_result.npy",np.asarray(predict_result_total))
np.save(save_path+"test_result.npy",np.asarray(test_result_total).mean(axis=0))