In [18]:
import os
os.environ["LOKY_MAX_CPU_COUNT"] = "20"  # ← 改成你的物理核数
os.environ["OMP_NUM_THREADS"] = "1"

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from utils2 import *

np.random.seed(47)

# 加载原始数据 
load_data   = np.load('user_load.npz')
PV_data     = np.load('PV_data.npz')
Radiation   = np.load('Radiation.npz')

load_data   = load_data['data']
PV_data     = PV_data['PV']
Radiation   = Radiation['Radiation'][:365*48]

num_user    = 400

# 保证Prosumer数据不会出现负数
load_data  += 8

# 放缩光伏功率数据
x_min = np.min(PV_data, axis=1, keepdims=True)  # 每行最小值
x_max = np.max(PV_data, axis=1, keepdims=True)  # 每行最大值
PV_data = 7 * (PV_data - x_min) / (x_max - x_min)

# 处理Prosumer和Consumer数据
user_list = np.arange(num_user)
site_list = np.arange(int(len(user_list)*0.1))
prosumer_list = np.random.choice(user_list, size=int(len(user_list)*0.1), replace=False)
consumer_list = np.setdiff1d(user_list, prosumer_list)

load_data   = load_data[user_list]
PV_data     = PV_data[site_list][:,:365*48]
load_data[prosumer_list] -= PV_data


# 生成时间序列（从 2010-01-01 开始）
date_rng = pd.date_range('2010-01-01', periods=365*24, freq='60min')

# 提取 weekday（0=Mon, 6=Sun）
weekday = date_rng.weekday.values  # shape: (17520,)

# One-hot 编码 (7 类)
weekday_onehot = np.eye(7)[weekday]  # shape: (17520, 7)

# 去掉一列以消除共线性（例如去掉 Sunday 列）
weekday_onehot = weekday_onehot[:, 1:]  # shape: (17520, 6)

# 计算每个时间步对应的小时数
# 0, 0.5, 1.0, ... , 23.5, 然后循环
hours = (np.arange(365*24) % 24)

# 生成正弦/余弦特征（24小时周期）
hour_sin = np.sin(2 * np.pi * hours / 24).reshape(-1,1)
hour_cos = np.cos(2 * np.pi * hours / 24).reshape(-1,1)

Calendar = np.concatenate((weekday_onehot, hour_sin), axis=1)
Calendar = np.concatenate((Calendar, hour_cos), axis=1)

print(Calendar.shape)
load_data = load_data.reshape(num_user, 2,-1).sum(axis=1)
Radiation = Radiation.reshape(2,-1).sum(axis=0)
print(load_data.shape)

(8760, 8)
(400, 8760)


In [21]:
K           = 10          # 簇数 Num of cluster
num_users   = 1000        # 用户数 Num of users

train_ratio = 0.72
val_ratio   = 0.18
max_epoch   = 100
save_path   = "Process_3\\CLC\\"
os.makedirs(save_path,exist_ok=True)
MAE_list = []
MAPE_list = []
# 4/5
for K in range(10,11):
    # 1.初始随机分配 Initialization
    kmeans = KMeans(n_clusters=K,random_state=47,n_init='auto')
    kmeans.fit(load_data[:, :int(load_data.shape[1]*train_ratio)])
    cluster_list = np.array(kmeans.labels_)
    for epoch in range(max_epoch):
        original_list = cluster_list
        aggr_data = aggregate_by_cluster_list(load=load_data,labels=cluster_list,num_cluster=K, agg='mean')
        # 2.MLR预测模型构建 Forecasting Models Training
        model_dict = train_model(cluster_num=K, cluster_list=cluster_list, aggr_data=aggr_data, 
                                lag=24,train_ratio=train_ratio, Radiation=Radiation,Calendar=Calendar)
        # 3.构建误差反馈矩阵 Feedback Mechanism
        matrix = error_feedback(load_data=load_data,cluster_num=K, model_dict=model_dict,lag=24,
                                train_ratio=train_ratio, val_ratio=val_ratio, Radiation=Radiation,Calendar=Calendar)
        # 4.更新簇标签 Updating Clustering Membership
        cluster_list = np.argmin(matrix,axis=1)
        change_num = change_rate_simple(old_labels=original_list,new_labels=cluster_list)
        if change_num <= 1:
            print("finish")
            break
        else:
            # pass
            print('Change Num =',change_num)
    predict_result, test_result = Predict(cluster_num=K,cluster_list=cluster_list,
                                        aggr_data=aggr_data,lag=24,train_ratio=train_ratio,
                                        val_ratio=val_ratio,Radiation=Radiation,
                                        Calendar=Calendar,model_dict=model_dict)
    unique_elements = np.unique(cluster_list)

    user_belong = count_clusters(num_cluster=K, cluster_list=cluster_list)
    for index, cluster_id in enumerate(unique_elements):
            predict_result[index] = predict_result[index]*len(user_belong[str(cluster_id)])
            test_result[index] = test_result[index]*len(user_belong[str(cluster_id)])
    predict_result = predict_result.sum(axis=0)
    test_result = test_result.sum(axis=0)

    np.save(save_path+"predict_result.npy",predict_result)
    np.save(save_path+"test_result.npy",test_result)

    MAE  = np.mean(np.abs(predict_result-test_result))
    MAPE = np.mean(np.abs(predict_result-test_result)/test_result)*100
    
    MAE_list.append(MAE)
    MAPE_list.append(MAPE)
    print("Epoch:",K,"FINISHED")
    
    
    num = 0
    for index in range(K):
        if model_dict[str(index)] == None:
            pass
        else:
            num +=1
    print("Final Cluster Num = ",num,"\n")
    
MAPE_list = np.asarray(MAPE_list)
MAE_list = np.asarray(MAE_list)

print("最小MAE聚类数",np.argmin(MAE_list)+1,"最小MAE",np.min(MAE_list))
print("最小MAPE聚类数",np.argmin(MAPE_list)+1,"最小MAPE",np.min(MAPE_list))

Change Num = 109
Change Num = 68
Change Num = 31
Change Num = 26
Change Num = 21
Change Num = 17
Change Num = 23
Change Num = 13
Change Num = 18
Change Num = 11
Change Num = 4
Change Num = 2
finish
Epoch: 10 FINISHED
Final Cluster Num =  7 

最小MAE聚类数 1 最小MAE 77.38032943054063
最小MAPE聚类数 1 最小MAPE 1.1067957025282014
