In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
RE = "Solar_PBE" 
address = "data/"

data_train_csv1 = pd.read_csv(address+RE+'_16.csv', index_col=0)
data_train_csv2 = pd.read_csv(address+RE+'_17.csv', index_col=0)
data_train_csv  = pd.concat([data_train_csv1, data_train_csv2])
data_val_csv    = pd.read_csv(address+RE+'_18.csv', index_col=0)
data_test_csv   = pd.read_csv(address+RE+'_19.csv', index_col=0)

data_price = pd.read_csv(address+'Price_Elia_Imbalance_16_19.csv', index_col=0)
data_train_csv['Price(€)'] = data_price['Positive imbalance price'][:len(data_train_csv)]
data_val_csv['Price(€)']   = data_price['Positive imbalance price'][len(data_train_csv):len(data_train_csv)+len(data_val_csv)]
data_test_csv['Price(€)']  = data_price['Positive imbalance price'][len(data_train_csv)+len(data_val_csv):]

In [14]:


Battery_Size = 0.15  # p.u.
unit = 1  

RE_Capacity1 = max(data_train_csv['Power(MW)'])
RE_Capacity2 = max(data_val_csv['Power(MW)'])
RE_Capacity3 = max(data_test_csv['Power(MW)'])
max_price = max(data_price['Marginal incremental price'])


size_train0 = len(data_train_csv) // unit
size_val0 = len(data_val_csv) // unit
size_test0 = len(data_test_csv) // unit


def normalize_data(power_data, price_data, capacity, max_price, size):
    normalized_power = []
    normalized_price = []
    for i in range(size):
        power_avg = pd.Series.mean(power_data[i * unit: (i + 1) * unit]) / capacity
        price_avg = pd.Series.mean(price_data[i * unit: (i + 1) * unit]) / max_price
        power_avg, price_avg = round(power_avg, 3), round(price_avg, 3)
        if power_avg > 0:
            normalized_power.append(power_avg)
            normalized_price.append(price_avg)
    return normalized_power, normalized_price

data_train, price_train = normalize_data(data_train_csv['Power(MW)'], data_train_csv['Price(€)'], RE_Capacity1, max_price, size_train0)
data_val, price_val = normalize_data(data_val_csv['Power(MW)'], data_val_csv['Price(€)'], RE_Capacity2, max_price, size_val0)
data_test, price_test = normalize_data(data_test_csv['Power(MW)'], data_test_csv['Price(€)'], RE_Capacity3, max_price, size_test0)


In [None]:
print(data_train)

In [None]:
n_layers = 2
in_size = 3
hidden_size = 64
out_size = 2
T_horizon = 128
learning_rate = 0.001
K_epoch = 3
gamma = 0.99
lmbda = 0.95
eps_clip = 0.01
C_value = 1
var = 0.1**2

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.fc_s = nn.Linear(in_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)
        self.fc_pi = nn.Linear(hidden_size, out_size)
        self.fc_v = nn.Linear(hidden_size, 1)

    def forward_rnn(self, x, hidden):
        x = F.relu(self.fc_s(x)).view(1, -1, hidden_size)
        x, hidden = self.rnn(x, hidden)
        return x, hidden

    def pi(self, x, hidden): # policy/action
        x, hidden = self.forward_rnn(x, hidden)
        pi = self.fc_pi(x).view(-1, out_size)
        return pi, hidden

    def v(self, x, hidden): # value
        x, hidden = self.forward_rnn(x, hidden)
        v = self.fc_v(x).view(-1, 1)
        return v, hidden

def train_net(model, batch, optimizer):
    # transition data
    o, H, a, r, o_prime, H_prime, done = zip(*batch[0])
    H, H_prime = batch[1][0], batch[1][1]

    # to tensors
    o = torch.tensor(o, dtype=torch.float)
    a = torch.tensor(a, dtype=torch.float)
    r = torch.tensor(r, dtype=torch.float).unsqueeze(1)
    o_prime = torch.tensor(o_prime, dtype=torch.float)
    done = torch.tensor([0 if d else 1 for d in done], dtype=torch.float).unsqueeze(1)

    H = (H[0].detach(), H[1].detach())
    H_prime = (H_prime[0].detach(), H_prime[1].detach())

    # Calculate old action probabilities
    pi_old, _ = model.pi(o, H)
    pdf_old = torch.distributions.MultivariateNormal(pi_old, var * torch.eye(out_size))
    prob_old = torch.exp(pdf_old.log_prob(a)).view(-1, 1).detach()

    # Value target and advantage estimation
    v_prime, _ = model.v(o_prime, H_prime)
    v, _ = model.v(o, H)
    v_target = r + gamma * v_prime * done
    td_error = (r + gamma * v_prime * done - v).detach().squeeze().numpy()

    advantage = compute_advantage(td_error)

    # PPO update
    for _ in range(K_epoch):
        pi, _ = model.pi(o, H)
        pdf = torch.distributions.MultivariateNormal(pi, var * torch.eye(out_size))
        prob = torch.exp(pdf.log_prob(a)).view(-1, 1)
        ratio = (prob / prob_old).clamp(1 - eps_clip, 1 + eps_clip)

        # Actor and critic loss
        loss_actor = torch.min(ratio * advantage, ratio.clamp(1 - eps_clip, 1 + eps_clip) * advantage)
        loss_critic = F.mse_loss(v, v_target.detach())
        loss = -(loss_actor.mean() - C_value * loss_critic)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def compute_advantage(td_error):
    advantage, A = [], 0.0
    for delta in reversed(td_error.flatten()):
        A = delta + gamma * lmbda * A
        advantage.append(A)
    advantage.reverse()
    return torch.tensor(advantage, dtype=torch.float).unsqueeze(1)


In [11]:
# Environment
 
E_max   = Battery_Size
P_max   = E_max
tdelta  = unit/4
soc_min = 0.1
soc_max = 0.9
a0 = -1.031; a1 = 35; a2 = 3.685; a3 = 0.2156; a4 = 0.1178; a5 = 0.3201
b0 = 0.1463; b1 = 30.27; b2 = 0.1037; b3 = 0.0584; b4 = 0.1747; b5 = 0.1288
c0 = 0.1063; c1 = 62.49; c2 = 0.0437; d0 = 0.0712; d1 = 61.4; d2 = 0.0288
N = 130*215*E_max/0.1
beta = 10/max_price
 
