In [52]:
import numpy as np
import pandas as pd
import os
import pickle
from tqdm import tqdm
from pathlib import Path
from collections import Counter
from sklearn.preprocessing import StandardScaler

In [2]:
pd.read_csv("demo.csv").fillna(method="backfill", inplace=False)

Unnamed: 0,TurbID,Day,Tmstamp,Wspd,Wdir,Etmp,Itmp,Ndir,Pab1,Pab2,Pab3,Prtv,Patv
0,1,1,00:00,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66
1,2,1,00:10,6.17,-3.99,30.73,41.8,25.92,1.0,1.0,1.0,-0.25,494.66
2,2,1,00:20,6.27,-2.18,30.6,41.63,20.91,1.0,1.0,1.0,-0.24,509.76
3,1,1,00:30,6.42,-0.73,30.52,41.52,20.91,1.0,1.0,1.0,-0.26,542.53
4,1,1,00:40,6.25,0.89,30.49,41.38,20.91,1.0,1.0,1.0,-0.23,509.36
5,1,1,00:50,6.1,-1.03,30.47,41.22,20.91,1.0,1.0,1.0,-0.27,482.21
6,3,1,01:00,6.77,1.07,30.31,41.19,20.91,1.0,1.0,1.0,-0.23,584.75
7,3,1,01:10,6.7,-2.8,30.24,41.0,20.91,1.0,1.0,1.0,-0.23,557.98
8,3,1,01:20,6.44,-3.46,30.13,40.91,20.91,1.0,1.0,1.0,-0.21,503.94


In [3]:
df = pd.read_csv("/mnt/File/my_file/2024-2/fed-multimodal/fed_multimodal/data/SDWPF/SDWPF.csv")
df.fillna(method="backfill", inplace=True)
df.fillna(method="pad",inplace=True)
# 删除 Day 和 Tmstamp 两列
# df.drop(columns=["Day", "Tmstamp"], inplace=True)
# 按 TurbID 分组
groups = df.groupby("TurbID")

# 将分组后的数据格式转换成 NumPy
data_by_turbid = {
    turbid: group.drop(columns=["TurbID", "Day", "Tmstamp"])    # 删除 Day 和 Tmstamp 两列
    for turbid, group in groups
}

agg_batch = 12  # 两个小时
print("数据总个数为:", int(df.shape[0] // agg_batch))
print("客户端个数为:", int(len(data_by_turbid)))

# 多模态数据分类
model1_dict = {}
model2_dict = {}
# 规范化数据，处理预测值
for turbid, data in tqdm(data_by_turbid.items()):
    data_by_turbid[turbid]["Patv"] = data_by_turbid[turbid]["Patv"].groupby(np.arange(len(data_by_turbid[turbid])) // agg_batch).transform("mean")
    scaler = StandardScaler()
    data_by_turbid[turbid][['Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir', 'Pab1', 'Pab2', 'Pab3', 'Prtv']] = scaler.fit_transform(data_by_turbid[turbid][['Wspd', 'Wdir', 'Etmp', 'Itmp', 'Ndir', 'Pab1', 'Pab2', 'Pab3', 'Prtv']])
    data_by_turbid[turbid] = data_by_turbid[turbid][['Wspd','Etmp','Itmp','Prtv','Wdir','Ndir','Pab1','Pab2','Pab3','Patv']].reset_index(drop=True)
    
    # 预测之后两个小时的发电量
    for i in range(data_by_turbid[turbid].shape[0]):
        
        if i % agg_batch != 0: continue
        if i > data_by_turbid[turbid].shape[0] - agg_batch + 1: break
        
        # print(i-agg_batch,i-1)
        data_by_turbid[turbid].loc[i-agg_batch:i-1,"Patv"] = data_by_turbid[turbid].at[i,"Patv"]
        
    model1_dict[turbid] = data_by_turbid[turbid][['Wspd','Etmp','Itmp','Prtv','Patv']].to_numpy()
    model2_dict[turbid] = data_by_turbid[turbid][['Wdir','Ndir','Pab1','Pab2','Pab3','Patv']].to_numpy()
   


数据总个数为: 393960
客户端个数为: 134


100%|██████████| 134/134 [00:35<00:00,  3.79it/s]


In [4]:
def split_train_dev_test(
    data_index: list,
    seed: int=8
) -> tuple[list, list, list]:
    
    train_arr = np.arange(len(data_index))
    np.random.seed(seed)
    np.random.shuffle(train_arr)
    
    train_len = int(len(data_index) * 0.7)
    val_len = int(len(data_index) * 0.15)
    # test_len = int(len(data_index) * 0.15)
    
    # print(train_len, " ",val_len," ", test_len)
    
    train_index = [data_index[idx] for idx in train_arr[:train_len]]
    val_index = [data_index[idx] for idx in train_arr[train_len:train_len + val_len]]
    test_index = [data_index[idx] for idx in train_arr[train_len + val_len:]]
    
    return train_index, val_index, test_index

In [36]:
model1_dict_train = []
model2_dict_train = []

model1_dict_val = []
model2_dict_val = []

model1_dict_test = []
model2_dict_test = []

In [39]:
len_total = len(model1_dict[1]) // 12
for i, _ in tqdm(model1_dict.items()):
    model1_dict_train.append(list())
    model2_dict_train.append(list())
    model1_dict_val.append(list())
    model2_dict_val.append(list())
    model1_dict_test.append(list())
    model2_dict_test.append(list())
    train_index, val_index, test_index = split_train_dev_test(np.arange(len_total))
    
    for j in train_index:
        tmp1 = [i, model1_dict[i][j*agg_batch,4], model1_dict[i][j*agg_batch:(j+1)*agg_batch,0:4]]
        tmp2 = [i, model2_dict[i][j*agg_batch,5], model2_dict[i][j*agg_batch:(j+1)*agg_batch,0:5]]
        model1_dict_train[i-1].append(tmp1)
        model2_dict_train[i-1].append(tmp2)
        
    for j in test_index:
        tmp1 = [i, model1_dict[i][j*agg_batch,4], model1_dict[i][j*agg_batch:(j+1)*agg_batch,0:4]]
        tmp2 = [i, model2_dict[i][j*agg_batch,5], model2_dict[i][j*agg_batch:(j+1)*agg_batch,0:5]]
        model1_dict_test[i-1].append(tmp1)
        model2_dict_test[i-1].append(tmp2)
        
    for j in val_index:
        tmp1 = [i, model1_dict[i][j*agg_batch,4], model1_dict[i][j*agg_batch:(j+1)*agg_batch,0:4]]
        tmp2 = [i, model2_dict[i][j*agg_batch,5], model2_dict[i][j*agg_batch:(j+1)*agg_batch,0:5]]
        model1_dict_val[i-1].append(tmp1)
        model2_dict_val[i-1].append(tmp2)

  0%|          | 0/134 [00:00<?, ?it/s]

100%|██████████| 134/134 [00:01<00:00, 95.07it/s] 


In [48]:
model1_dict_train[0][198]

[1,
 1248.7308333333333,
 array([[ 1.4031354 , -0.21303465,  0.00215075,  0.04200387],
        [ 1.29331267, -0.19902366,  0.01082414,  0.04253002],
        [ 1.31253165, -0.18781487,  0.01516084,  0.04253002],
        [ 1.36469744, -0.17884783,  0.02879045,  0.04226695],
        [ 1.31527721, -0.16539728,  0.03932243,  0.04410848],
        [ 1.50197585, -0.15699069,  0.05047393,  0.0417408 ],
        [ 1.43059108, -0.14914454,  0.05109346,  0.0427931 ],
        [ 1.45530119, -0.1401775 ,  0.05790826,  0.0448977 ],
        [ 1.57061506, -0.13681487,  0.06720118,  0.04200387],
        [ 1.49923028, -0.13345223,  0.0696793 ,  0.04200387],
        [ 1.35371517, -0.13008959,  0.07401599,  0.04463463],
        [ 1.59807074, -0.12560607,  0.08268938,  0.04253002]])]

In [54]:
with open(str("/mnt/File/my_file/2024-2/fed-multimodal/fed_multimodal/output/feature/model1/SDWPF/alpha12/2.pkl"), "rb") as f: data_dict = pickle.load(f)

In [55]:
data_dict

[[2,
  -0.3,
  array([[-0.92530572,  1.45115337,  0.47804163,  0.21064086],
         [-1.10217431,  1.45115337,  0.47589471,  0.21064086],
         [-1.16475859,  1.44634314,  0.47374779,  0.21064086],
         [-1.2246218 ,  1.43672267,  0.46945396,  0.21064086],
         [-1.17564281,  1.42149026,  0.46669364,  0.21064086],
         [-1.21373758,  1.40144761,  0.4623998 ,  0.21064086],
         [-1.22190075,  1.39583567,  0.45687916,  0.21064086],
         [-1.14026909,  1.39102544,  0.45135851,  0.21064086],
         [-1.33346401,  1.35254355,  0.4489049 ,  0.21064086],
         [-1.27904291,  1.28520026,  0.44277085,  0.21064086],
         [-1.06135848,  1.23950303,  0.4369435 ,  0.21064086],
         [-1.0967322 ,  1.19781433,  0.42651562,  0.21064086]])],
 [2,
  -0.3,
  array([[-0.61238436,  0.75527271,  0.15079012,  0.21064086],
         [-0.23959979,  0.75767782,  0.1486432 ,  0.21064086],
         [-0.09810492,  0.77291024,  0.14680299,  0.21064086],
         [-0.72122658,  0.

In [51]:
np.arange(800) // 8

array([ 0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  2,
        2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,
        4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,
        6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,
        8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10,
       10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12,
       12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
       14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
       17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19,
       19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21,
       21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23,
       23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25,
       25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27,
       27, 27, 27, 28, 28