# 使用Transformer+Multitask+MLP，对给定的小时级u,v，给出小时级power_total以及capacity_total

## 定义数据集及其处理方式。定义对于capacity的数据集

In [1]:
import pandas as pd
import numpy as np

path = r'E:\PKU_ThirdGrade_Autumn\机器学习基础'
u_wind = []
v_wind = []
capacity = []
power = []
sample_number = [0,1000,2000,3000,4000,5000]
for j in range(1,6):
    for i in range(sample_number[j-1],sample_number[j]):
        if i == 4014:
            continue
        u_file = path + rf'\Results_{j}\data_{i}\u.csv'
        v_file = path + rf'\Results_{j}\data_{i}\v.csv'
        capacity_file = path + rf'\Results_{j}\data_{i}\capacity.csv'
        power_file = path + rf'\Results_{j}\data_{i}\power.csv'
        df = pd.read_csv(u_file,header = None)
        u_wind.append(df.iloc[:,0].values)
        df = pd.read_csv(v_file,header = None)
        v_wind.append(df.iloc[:,0].values)
        df = pd.read_csv(power_file)
        power.append(df.iloc[2:,10].values)
        df = pd.read_csv(capacity_file)
        capacity.append(df.iloc[1:,10].values)
    
u_wind = np.array(u_wind)
v_wind = np.array(v_wind)
capacity = np.array(capacity)
power = np.array(power)
print(u_wind.shape)
print(v_wind.shape)
print(capacity.shape)
print(power.shape)

(4999, 8760)
(4999, 8760)
(4999, 12)
(4999, 8760)


In [2]:
u_wind = np.transpose(u_wind)
v_wind = np.transpose(v_wind)
capacity = np.transpose(capacity)
power = np.transpose(power)
print(u_wind.shape)
print(v_wind.shape)
print(capacity.shape)
print(power.shape)
input_size = u_wind.shape[0] 
output_capacity_size = capacity.shape[0]
output_power_size = power.shape[0]

(8760, 4999)
(8760, 4999)
(12, 4999)
(8760, 4999)


In [3]:
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch
import matplotlib.pyplot as plt

sample_number = 4999
def data_initialization(u_wind,v_wind,capacity,power):
    input_merged, target1, target2 = [], [], []
    for i in range(sample_number):
        u_wind_sample = list(u_wind[:,i])
        v_wind_sample = list(v_wind[:,i])
        capacity_sample = list(capacity[:,i])
        power_sample = list(power[:,i])
        u_wind_sample = np.array(u_wind_sample)
        v_wind_sample = np.array(v_wind_sample)
        input_sample = np.column_stack((u_wind_sample, v_wind_sample))
        input_merged.append(input_sample)
        target1.append(capacity_sample)
        target2.append(power_sample)
        
    return np.array(input_merged), np.array(target1), np.array(target2)

class dataset_to_Dataset(Dataset):
    def __init__(self,data_input_merged,data_target):
        self.len = data_input_merged.shape[0]
        print(data_input_merged.shape)
        print(data_target.shape)
        self.input_merged = torch.from_numpy(data_input_merged)
        self.target1 = torch.from_numpy(data_target)
        self.target1_normalized = ((self.target1 - self.target1.mean(dim=0))) / self.target1.std(dim=0)
        
    def __getitem__(self,index):
        return self.input_merged[index,:,:],self.target1_normalized[index,:]
    
    def __len__(self):
        return self.len
    
def dataset_split_6sets(data_input_merged, data_target1,data_target2, ratio=0.9):
    split_index = int(ratio*sample_number)
    train_input_merged = data_input_merged[:split_index,:]
    train_target1 = data_target1[:split_index,:]
    train_target2 = data_target2[:split_index,:]
    test_input_merged = data_input_merged[split_index:,:]
    test_target1 = data_target1[split_index:,:]
    test_target2 = data_target2[split_index:,:]
    return train_input_merged, train_target1,train_target2,test_input_merged, test_target1,test_target2

## 搭建Transformer+Multitask+MLP

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, seq_len, num_heads=8, num_layers=6, ff_dim=64, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim,ff_dim)
        self.positional_encoding = nn.Parameter(torch.zeros(1,seq_len,ff_dim))
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model = ff_dim,
            nhead = num_heads,
            dim_feedforward = ff_dim,
            dropout = dropout
        )
        self.encoder = nn.TransformerEncoder(self.encoder_layer,num_layers=num_layers)
        self.fc_out = nn.Linear(ff_dim, output_dim)
        
    def forward(self, x):
        x = self.embedding(x)
        x = x + self.positional_encoding
        x = self.encoder(x)
        output = self.fc_out(x.mean(dim=1))
        return output
    
input_dim = 2
output_dim = 12
seq_len = 8760

## 开始训练

In [7]:
import time
import torch.nn as nn
import torch.optim as optim
import torch
# Example inputs
BATCH_SIZE = 64
LEARN_RATE = 1e-3

dataset_input_merged, dataset_target1,dataset_target2 = data_initialization(u_wind,v_wind, capacity, power)
print(dataset_input_merged.shape)
print(dataset_target1.shape)
print(dataset_target2.shape)
dataset_input_merged = dataset_input_merged.astype(np.float32)
dataset_target1 = dataset_target1.astype(np.float32)
dataset_target2 = dataset_target2.astype(np.float32)

train_input_merged, train_target1,train_target2,test_input_merged, test_target1,test_target2 = dataset_split_6sets(dataset_input_merged, dataset_target1,dataset_target2)
train_set = dataset_to_Dataset(train_input_merged,train_target1)

train_set_iter = DataLoader(dataset=train_set,# 将数据封装进Dataloader类
                            batch_size=BATCH_SIZE,
                            shuffle=True,  # 打乱batch与batch之间的顺序
                            drop_last=True)# drop_last = True表示最后不够一个batch就舍弃那些多余的数据


(4999, 8760, 2) (4999, 12) (4999, 8760)
(4999, 8760, 2)
(4999, 12)
(4999, 8760)
(4499, 8760, 2)
(4499, 8760)


In [8]:
EPOCH = 1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

model = TransformerModel(input_dim, output_dim,seq_len).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE)

BATCH_SIZE =64
loss_plot = []
input_size = 8760
capacity_size = 12

start = time.perf_counter()

for epoch in range(EPOCH):
    loss_print = []
    for batch_idx, (x, y1) in enumerate(train_set_iter):
        x = x.reshape([BATCH_SIZE, input_size, 2])
        x = x.to(device)
        y1 = y1.reshape((BATCH_SIZE,capacity_size))
        y1 = y1.to(device)
        y1_pred = model(x)
        loss = criterion(y1, y1_pred)
        loss_print.append(loss.item())
        # 三大步
        # 网络的梯度值更为0
        model.zero_grad()
        # loss反向传播
        loss.backward()
        # 优化器更新
        optimizer.step()
    print(f'{epoch} of loss: LSTM+Multitask+MLP:loss:',sum(loss_print)/len(loss_print))
    loss_plot.append(sum(loss_print)/len(loss_print))
        
end = time.perf_counter()
print('训练时间为：{:.2f}s'.format(end-start))

cuda




OutOfMemoryError: CUDA out of memory. Tried to allocate 12.83 GiB. GPU 

In [None]:
torch.save(model.state_dict(), 'transformer_normalization_full_2.pt')

In [None]:
plt.plot(loss_plot)
plt.title("loss curve")

## 检测预测效果

In [None]:
model = TransformerModel(input_dim, output_dim,seq_len).to(device)
model.load_state_dict(torch.load('transformer_normalization_full_2.pt'))

model.eval()

BATCH_SIZE = 1
test_set = dataset_to_Dataset(test_input_merged,test_target2)
test_set_iter = DataLoader(dataset=test_set,batch_size=BATCH_SIZE,drop_last=True)

loss_data = []
y1_pred_data = []
y1_ref_data = []
for batch_idx, (x, y1) in enumerate(test_set_iter):
    x = x.reshape([BATCH_SIZE, input_size, 2])
    x = x.to(device)
    y1 = y1.reshape((BATCH_SIZE,capacity_size))
    y1 = y1.to(device)
    y1_pred = model(x)
    loss = criterion(y1, y1_pred)
    print(loss.item())
    loss_data.append(loss.item())
    y1_pred_data.append(y1_pred.detach().cpu().numpy())
    y1_ref_data.append(y1.detach().cpu().numpy())
    print(f"{batch_idx} has been finished")    

In [None]:
print(loss_data)
plt.plot(loss_data)

In [None]:
print(y1_pred_data[0].shape)
y1_pred_data = np.concatenate(y1_pred_data,axis=0)
print(y1_pred_data.shape)

In [None]:
print(y1_ref_data[0].shape)
y1_ref_data = np.concatenate(y1_ref_data,axis=0)
print(y1_ref_data.shape)

In [None]:
print(y1_ref_data[0])
print(y1_pred_data[0])

In [None]:
y1_relative_error = np.zeros_like(y1_ref_data)
for i in range(500):
    for j in range(12):
        if y1_ref_data[i,j] <= 0.01:
            y1_relative_error[i,j] = np.abs((y1_ref_data[i,j] - y1_pred_data[i,j]))
        else:
            y1_relative_error[i,j] = np.abs((y1_ref_data[i,j] - y1_pred_data[i,j]) / y1_ref_data[i,j])

print(y1_relative_error.shape)
y1_relative_error[0]

y1_relative_error_mean = 0
for i in range(500):
    y1_relative_error_mean += np.mean(y1_relative_error[i,:])
y1_relative_error_mean /= 500
y1_relative_error_mean

In [None]:
plt.plot(y1_relative_error[0])
plt.title("loss curve")
print(np.mean(y1_relative_error[0]))

In [None]:
plt.plot(y1_ref_data[0])
plt.plot(y1_pred_data[0])