In [1]:
# 挂在云盘
from google.colab import drive
drive.mount('/content/drive')
# 扫描到相应的路径
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
from model import TransformerRegressor
import torch
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot
import numpy as np
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [4]:
# lagged GHI values
LAG = 10

# prediction horizon
K = 24

# use exogenous inputs
EXOGENOUS = True

# features
if(EXOGENOUS):
    features = ['K','uvIndex','cloudCover','sunshineDuration','windBearing','humidity','temperature','hour','dewPoint']
else:
    features = ['K']


In [5]:

# metrics
def mad(y_pred,y_test):
    return 100 / y_test.mean() * np.absolute(y_pred - y_test).sum() / y_pred.size

def mdb(y_pred,y_test):
    return 100 / y_test.mean() * (y_pred - y_test).sum() / y_pred.size

def r2(y_pred,y_test):
    return r2_score(y_test, y_pred)

def rmsd(y_pred,y_test):
    return 100 / y_test.mean() * np.sqrt(np.sum(np.power(y_pred - y_test, 2)) / y_pred.size)

def mae(y_pred,y_test):
    return mean_absolute_error(y_test, y_pred)

def mse(y_pred,y_test):
    return mean_squared_error(y_test, y_pred)

In [6]:
# load dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/datasets/clean_dataset.csv",header=0, index_col=0, parse_dates=True).sort_index()
# split exogenous input and clear sky index (K)
df_GHI = df[['K']].copy()

# create exogenous regressors
for feature in features:
    df_GHI[feature] = df[feature]
    for i in range(LAG-1):
        df_GHI[feature+'-'+str(i+1)] = df[feature].shift(i+1)

# create target values
for i in range(1,K+1):
    #  df_GHI['K+'+str(i)] = df['K'].shift(-i)
    df_k= df['K'].shift(-i).rename('K+'+str(i))
    df_GHI = pd.concat([df_GHI,df_k],axis=1)
    
# create clear sky target values
for i in range(1,K+1):
    # df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
    df_cs = df['GHI_cs'].shift(-i).rename('GHI_cs+'+str(i))
    df_GHI = pd.concat([df_GHI,df_cs],axis=1)

# drop nan due to shifting
df_GHI = df_GHI.dropna()

# create training set
X_train = df_GHI['2010-1-1':'2014-6-30'].values[:,:-K*2]
y_train = df_GHI['2010-1-1':'2014-6-30'].values[:,-K*2:-K]

# create validation set
X_val = df_GHI['2014-7-1':'2014-12-31'].values[:,:-K*2]
y_val = df_GHI['2014-7-1':'2014-12-31'].values[:,-K*2:-K]

# create test set
X_test = df_GHI['2015-1-1':'2015-12-31'].values[:,:-K*2]
y_test = df_GHI['2015-1-1':'2015-12-31'].values[:,-K*2:-K]

# get clear sky target values
y_cs = df_GHI['2015-1-1':'2015-12-31'].values[:,-K:]

# scale features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)


  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['K+'+str(i)] = df['K'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(-i)
  df_GHI['GHI_cs+'+str(i)] = df['GHI_cs'].shift(

In [7]:

epochs = 100
batch = 32
best_model = None
# best_mse = 99999999

In [8]:
data = torch.from_numpy(X_train).type(torch.float32)
label = torch.from_numpy(y_train).type(torch.float32)
dataset_train = TensorDataset(data,label)
data_loader = DataLoader(dataset_train,batch_size=batch,shuffle=False)
data_val = torch.from_numpy(X_val).type(torch.float32)
label_val = torch.from_numpy(y_val).type(torch.float32)
dataset_val = TensorDataset(data_val,label_val)
dataval_loader = DataLoader(dataset_val,batch_size=batch,shuffle=False)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device =torch.device("cuda")
writer = SummaryWriter('/content/drive/MyDrive/Colab Notebooks/lagslogs/')
input_d = 9*LAG
model = TransformerRegressor(input_dim=input_d,output_dim=1,num_heads=8,d_model=512).to(device)
criterion = nn.MSELoss().to(device)     # 忽略 占位符 索引为0.
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.99)
val_loss = []
train_loss = []



In [10]:
data_test =torch.from_numpy(X_test).type(torch.float32)
label_test = torch.from_numpy(y_test).type(torch.float32)
dataset_test = TensorDataset(data_test,label_test)
datatest_loader = DataLoader(dataset_test,batch_size=1,shuffle=False)

In [11]:

y_preds = np.zeros(y_cs.shape)
best_loss = np.zeros(y_cs.shape[1])
best_loss.shape


(24,)

In [None]:
for i in range(7,K):
  best_loss[i]=100000
  best_test_loss = best_loss[i]
  j=0#j作为判断是否是bestloss，需不需要跳出循环
  for epoch in range(epochs):
    train_epoch_loss = []
    val_epoch_loss = []
    if j >=15:
      break
    for X_train,y_train in data_loader:
        y = y_train[:,i]  #y:torch.size(32)
        y = y.unsqueeze(1) #y:32*1
        # print(y.shape)
        # print("************************************")
        X_tr = np.expand_dims(X_train,0)
        y_tr = np.expand_dims(y,0)
        # print(y_tr.shape)
        # print("______________________________")
        # print(X_tr)
        # print("______________________________")
        input_xtr = X_tr.transpose(1, 0, 2) #数组转维度，(1,32,9) -->(32,1,9)
        input_ytr = y_tr.transpose(1, 0, 2) #(32,1,1)
        input_xtr = torch.tensor(input_xtr).to(device)
        input_ytr = torch.tensor(input_ytr).to(device)
        optimizer.zero_grad()
        output = model(input_xtr)#训练
        trainloss = criterion(output, input_ytr)
        # print(output.shape)
        # print(trainloss.shape)
        trainloss.backward()
        optimizer.step()
    train_epoch_loss.append(trainloss.item())
    train_loss.append(np.mean(train_epoch_loss))
    writer.add_scalar("train_loss", np.mean(train_epoch_loss))
    for X_val, y_val in dataval_loader:
      y_val = y_val[:,i]
      y_val = y_val.unsqueeze(1)
      x_v = np.expand_dims(X_val, 0)
      y_v = np.expand_dims(y_val, 0)
      input_xval = x_v.transpose(1, 0, 2)
      input_yval = y_v.transpose(1, 0, 2)
      X_val = torch.tensor(input_xval).to(device)
      Y_val = torch.tensor(input_yval).to(device)
      output = model(X_val)
      valloss = criterion(output, Y_val)
      val_epoch_loss.append(valloss.item())
    val_loss.append(np.mean(val_epoch_loss))
    writer.add_scalar("val_loss", np.mean(val_epoch_loss), epoch)
    print("第{0}min的epoch:".format(15*(i+1)), epoch, "train_epoch_loss:", np.mean(train_epoch_loss), "val_epoch_loss:", np.mean(val_epoch_loss))
    j +=1
    print(j)
    # 保存下来最好的模型：
    if np.mean(val_epoch_loss) < best_test_loss:
        best_test_loss = np.mean(val_epoch_loss)
        j = 0
        best_model = model
        print("best_test_loss:", best_test_loss)
        torch.save(best_model.state_dict(), f'/content/drive/MyDrive/Colab Notebooks/lagslogs/best_Transformer_trainModel_{i}.pth')



第120min的epoch: 0 train_epoch_loss: 0.01986568048596382 val_epoch_loss: 0.08183309960050805
1
best_test_loss: 0.08183309960050805
第120min的epoch: 1 train_epoch_loss: 0.0278899185359478 val_epoch_loss: 0.08134212778489766
1
best_test_loss: 0.08134212778489766
第120min的epoch: 2 train_epoch_loss: 0.019898880273103714 val_epoch_loss: 0.08145070125760538
1
第120min的epoch: 3 train_epoch_loss: 0.02127779647707939 val_epoch_loss: 0.08060426497325787
2
best_test_loss: 0.08060426497325787
第120min的epoch: 4 train_epoch_loss: 0.025701886042952538 val_epoch_loss: 0.0765514788032253
1
best_test_loss: 0.0765514788032253
第120min的epoch: 5 train_epoch_loss: 0.009328166022896767 val_epoch_loss: 0.05762451023079136
1
best_test_loss: 0.05762451023079136
第120min的epoch: 6 train_epoch_loss: 0.010848002508282661 val_epoch_loss: 0.05851716566271073
1
第120min的epoch: 7 train_epoch_loss: 0.00040337926475331187 val_epoch_loss: 0.04753810998789814
2
best_test_loss: 0.04753810998789814
第120min的epoch: 8 train_epoch_loss: 0

In [None]:
for i in range(K):
  # model.load_state_dict(torch.load(f'/content/drive/MyDrive/Colab Notebooks/lagslogs/best_Transformer_trainModel_{i}.pth')) #GPU
  model.load_state_dict(torch.load(f'/content/drive/MyDrive/Colab Notebooks/lagslogs/best_Transformer_trainModel_{i}.pth',map_location=torch.device('cpu')))
  model.to(device)
  model.eval()
  num_clo = y_cs.shape[0]
  y_pred = []
  for X_test,y_test in datatest_loader:
    y_test = y_test[:,i]
    y_test = y_test.unsqueeze(1)
    x_tt = np.expand_dims(X_test, 0)
    y_tt = np.expand_dims(y_test, 0)
    input_xtt = x_tt.transpose(1, 0, 2)
    input_ytt = y_tt.transpose(1, 0, 2)
    # print(input_ytt.shape)
    X_tt = torch.tensor(input_xtt).to(device)
    Y_tt = torch.tensor(input_ytt).to(device)
    # print(Y_tt.shape)
    output = model(X_tt)
    # output = output.cpu().detach().numpy()[0]
    y_pred.append(output)
    Y_tt = Y_tt.cpu().detach().numpy()[0]
  # print("y_pred:",y_pred)
  pred = [tensor.detach().cpu().numpy() for tensor in y_pred]
  pred = np.array(pred)
  pred =pred.reshape(num_clo)
  y_preds[:,i] = pred



In [None]:
y_preds

In [None]:
# transform to GHI

y_pred2 = np.multiply(y_preds, y_cs)

y_test2 = np.multiply(label_test,y_cs)

# save results
results = {'K':[],'Time[min]':[],'MAD[%]':[],'R2':[],'RMSD[%]':[]}


In [None]:
y_test2 =y_test2.numpy()

In [None]:

for i in range(K):
    results['K'].append(i+1)
    results['Time[min]'].append((i+1)*15)
    results['MAD[%]'].append(np.round(mad(y_pred2[:,i],y_test2[:,i]),2))
    results['R2'].append(np.round(r2(y_pred2[:,i],y_test2[:,i]),2))
    results['RMSD[%]'].append(np.round(rmsd(y_pred2[:,i],y_test2[:,i]),2))

# create results dataframe
results = pd.DataFrame(results)
results = results.set_index('K')

# save results
#if(EXOGENOUS):
#    results.to_csv('./results/'+model.name+'_exogenous.csv')
#else:
#    results.to_csv('./results/'+model.name+'.csv')

# print results
results.head(K)