In [1]:
import pandas as pd              # Imports the Pandas library as 'pd', commonly used for data manipulation and analysis.
import numpy as np               # Imports the NumPy library as 'np', which is used for numerical operations and working with arrays.
import torch                     # Imports the PyTorch library, which is used for building and training deep learning models.
import torch.nn as nn            # Imports the neural network module from PyTorch as 'nn', which contains tools for creating neural networks.
import torch.nn.functional as F  # Imports the functional module from PyTorch, often used for defining operations in neural networks such as activations (e.g., ReLU) and loss functions.


In [2]:
RE = "Solar_PBE"  # Defines a variable 'RE' with the value "Solar_PBE", used as a prefix for reading specific CSV files related to solar energy data.

# Reads training data from two CSV files for the years 2016 and 2017.
data_train_csv1 = pd.read_csv("../data/"+RE+'_16.csv', index_col=0)  # Reads 'Solar_PBE_16.csv', setting the first column as the index.
data_train_csv2 = pd.read_csv("../data/"+RE+'_17.csv', index_col=0)  # Reads 'Solar_PBE_17.csv', setting the first column as the index.

# Concatenates the training data for 2016 and 2017 into one DataFrame.
data_train_csv  = pd.concat([data_train_csv1, data_train_csv2])   # Combines 'data_train_csv1' and 'data_train_csv2' into a single DataFrame for training.

# Reads validation data for the year 2018 from a CSV file.
data_val_csv    = pd.read_csv("../data/"+RE+'_18.csv', index_col=0)  # Reads 'Solar_PBE_18.csv', setting the first column as the index.

# Reads testing data for the year 2019 from a CSV file.
data_test_csv   = pd.read_csv("../data/"+RE+'_19.csv', index_col=0)  # Reads 'Solar_PBE_19.csv', setting the first column as the index.

# Reads a separate CSV file containing electricity price data for the years 2016-2019.
data_price = pd.read_csv("../data/"+'Price_Elia_Imbalance_16_19.csv', index_col=0)  # Loads the price imbalance data into a DataFrame.

# Adds the 'Positive imbalance price' from the price data to the training data for the matching years.
data_train_csv['Price(€)'] = data_price['Positive imbalance price'][:len(data_train_csv)]  # Assigns prices to the training data, matching the length of the DataFrame.

# Adds the 'Positive imbalance price' from the price data to the validation data for the corresponding year.
data_val_csv['Price(€)']   = data_price['Positive imbalance price'][len(data_train_csv):len(data_train_csv)+len(data_val_csv)]  # Assigns prices to the validation data, ensuring the index range matches its length.

# Adds the 'Positive imbalance price' from the price data to the testing data for the year 2019.
data_test_csv['Price(€)']  = data_price['Positive imbalance price'][len(data_train_csv)+len(data_val_csv):]  # Assigns prices to the test data, covering the remaining price data after training and validation.

# Reads predictions for training, validation, and test sets from a CSV file, and initializes with a 0.0 value at the start.
train_predict = [0.0] + np.array(pd.read_csv(RE+"_Model4_DeepBid.csv", index_col=0)).flatten().tolist()  # Reads predictions from 'Solar_PBE_Model4_DeepBid.csv', flattens them, and prepends 0.0.
val_predict   = [0.0] + np.array(pd.read_csv(RE+"_Model4_DeepBid.csv", index_col=0)).flatten().tolist()  # Same for the validation set.
test_predict  = [0.0] + np.array(pd.read_csv(RE+"_Model4_DeepBid.csv", index_col=0)).flatten().tolist()  # Same for the test set.


In [3]:
# Data Preprocessing

Battery_Size = 0.15  # Defines the size of the battery as 0.15 p.u. (per unit), likely for energy storage considerations.
unit = 1  # Defines the unit of time as 1, corresponding to 15-minute intervals in the data.

# Retrieves the maximum renewable energy (RE) power output for the training, validation, and test datasets.
RE_Capacity1 = max(data_train_csv['Power(MW)'])  # Maximum power output (MW) in the training data.
RE_Capacity2 = max(data_val_csv['Power(MW)'])    # Maximum power output (MW) in the validation data.
RE_Capacity3 = max(data_test_csv['Power(MW)'])   # Maximum power output (MW) in the test data.

# Retrieves the maximum price from the price data.
max_price = max(data_price['Marginal incremental price'])  # Maximum marginal incremental price over the price data.

# Calculates the number of data points for the training, validation, and test sets by dividing the length of the datasets by the time unit.
size_train0 = int(len(data_train_csv)/unit)  # Number of intervals in the training data based on the unit.
size_val0   = int(len(data_val_csv)/unit)    # Number of intervals in the validation data.
size_test0  = int(len(data_test_csv)/unit)   # Number of intervals in the test data.

# Initializing lists to hold processed data and price information for training.
data_train0 = []; data_train = []; price_train0 = []; price_train = []

# Loop to preprocess the training data.
for i in range(size_train0):
    # Calculates the mean power output over the 'unit' interval, normalizes it by the maximum capacity, and rounds to 3 decimal places.
    data_train0  += [round(pd.Series.mean(data_train_csv['Power(MW)'][i*unit:(i+1)*unit])/RE_Capacity1, 3)]
    # Calculates the mean price over the 'unit' interval, normalizes it by the maximum price, and rounds to 3 decimal places.
    price_train0 += [round(pd.Series.mean(data_train_csv['Price(€)'][i*unit:(i+1)*unit])/max_price, 3)]
    # If the normalized power output is greater than 0, appends it and the corresponding price to the final training data lists.
    if data_train0[i] > 0: 
        data_train += [data_train0[i]]
        price_train += [price_train0[i]]

# Initializing lists to hold processed data and price information for validation.
data_val0 = []; data_val = []; price_val0 = []; price_val = []

# Loop to preprocess the validation data.
for i in range(size_val0):
    # Calculates the mean power output over the 'unit' interval, normalizes it by the maximum capacity, and rounds to 3 decimal places.
    data_val0  += [round(pd.Series.mean(data_val_csv['Power(MW)'][i*unit:(i+1)*unit])/RE_Capacity2, 3)]
    # Calculates the mean price over the 'unit' interval, normalizes it by the maximum price, and rounds to 3 decimal places.
    price_val0 += [round(pd.Series.mean(data_val_csv['Price(€)'][i*unit:(i+1)*unit])/max_price, 3)]
    # If the normalized power output is greater than 0, appends it and the corresponding price to the final validation data lists.
    if data_val0[i] > 0: 
        data_val += [data_val0[i]]
        price_val += [price_val0[i]]

# Initializing lists to hold processed data and price information for testing.
data_test0 = []; data_test = []; price_test0 = []; price_test = []

# Loop to preprocess the test data.
for i in range(size_test0):
    # Calculates the mean power output over the 'unit' interval, normalizes it by the maximum capacity, and rounds to 3 decimal places.
    data_test0  += [round(pd.Series.mean(data_test_csv['Power(MW)'][i*unit:(i+1)*unit])/RE_Capacity3, 3)]
    # Calculates the mean price over the 'unit' interval, normalizes it by the maximum price, and rounds to 3 decimal places.
    price_test0 += [round(pd.Series.mean(data_test_csv['Price(€)'][i*unit:(i+1)*unit])/max_price, 3)]
    # If the normalized power output is greater than 0, appends it and the corresponding price to the final test data lists.
    if data_test0[i] > 0: 
        data_test += [data_test0[i]]
        price_test += [price_test0[i]]


In [4]:
# PPO Agent (Partially Observable State, Continuous Action Space)
# Assumption 1: Standard deviation is fixed
# Assumption 2: History is composed of observations only

# Hyperparameters for the PPO agent
n_layers         = 2         # Number of LSTM layers.
in_size          = 3         # Input size (number of observation features).
hidden_size      = 64        # Number of hidden units in LSTM layers.
out_size         = 1         # Output size (action space dimension).
T_horizon        = 128       # Time horizon for batch updates.
learning_rate    = 0.001     # Learning rate for the optimizer.
K_epoch          = 3         # Number of epochs to update the policy.
gamma            = 0.99      # Discount factor for future rewards.
lmbda            = 0.95      # Smoothing factor for the Generalized Advantage Estimation (GAE).
eps_clip         = 0.01      # Clipping range for PPO objective.
C_value          = 1         # Coefficient for critic loss in the total loss function.
var              = 0.1**2    # Variance of the action distribution, assumed to be fixed.

# Defining the LSTM-based neural network model for the PPO agent
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        # Linear layer to map the input size to the hidden size
        self.fc_s  = nn.Linear(in_size, hidden_size)
        # LSTM layer with `hidden_size` units and `n_layers` layers
        self.rnn   = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)
        # Linear layer to output the action probabilities (policy network)
        self.fc_pi = nn.Linear(hidden_size, out_size)
        # Linear layer to output the value function (critic network)
        self.fc_v  = nn.Linear(hidden_size, 1)

    # Function to compute the action (policy)
    def pi(self, x, hidden):
        # Applies ReLU activation to the output of the first linear layer
        x = F.relu(self.fc_s(x))
        # Reshapes input to match LSTM input requirements (batch, seq, feature)
        x = x.view(1, -1, hidden_size)
        # Passes input through the LSTM, updating hidden state
        x, hidden = self.rnn(x, hidden)
        # Computes action probabilities using a linear layer
        pi = self.fc_pi(x)
        pi = pi.view(-1, out_size)  # Reshapes the output to match the action size
        return pi, hidden
    
    # Function to compute the value function (critic)
    def v(self, x, hidden):
        # Applies ReLU activation to the output of the first linear layer
        x = F.relu(self.fc_s(x))
        # Reshapes input for LSTM processing
        x = x.view(1, -1, hidden_size)
        # Passes input through the LSTM, updating hidden state
        x, hidden = self.rnn(x, hidden)
        # Computes the state value (critic output) using a linear layer
        v = self.fc_v(x)
        v = v.view(-1, 1)  # Reshapes the output to match the value size
        return v

# Function to train the PPO agent using collected batches of experiences
def train_net(model, batch, optimizer):
    # Initialize lists to store different elements of the batch
    o, H, a, r, o_prime, H_prime, done = [], [], [], [], [], [], []
    
    # Loop through the first part of the batch (transitions)
    for transition in batch[0]:
        o.append(transition[0])       # Observation
        a.append(transition[1])       # Action
        r.append([transition[2]])     # Reward
        o_prime.append(transition[3]) # Next observation
        # Whether the episode is done (0 for false, 1 for true)
        done.append([0]) if transition[4] else done.append([1])
    
    # Loop through the second part of the batch (hidden states)
    for transition in batch[1]:
        H.append(transition[0])       # Hidden state at current step
        H_prime.append(transition[1]) # Hidden state at next step
        
    # Convert collected data into tensors for model input
    o         = torch.tensor(o, dtype=torch.float)    # Convert observations to tensor
    H         = (H[0][0].detach(), H[0][1].detach())  # Detach hidden states from computation graph
    a         = torch.tensor(a, dtype=torch.float)    # Convert actions to tensor
    r         = torch.tensor(r, dtype=torch.float)    # Convert rewards to tensor
    o_prime   = torch.tensor(o_prime, dtype=torch.float) # Convert next observations to tensor
    H_prime   = (H_prime[0][0].detach(), H_prime[0][1].detach()) # Detach next hidden states
    done      = torch.tensor(done)                    # Convert done flags to tensor
    
    # Calculate the old policy probability distribution using Multivariate Normal Distribution
    pdf_old = torch.distributions.MultivariateNormal(model.pi(o, H)[0], var*torch.eye(out_size))
    prob_old = torch.exp(pdf_old.log_prob(a)).view(len(a), 1)  # Old action probabilities
    prob_old = prob_old.detach()  # Detach from graph for stable calculations
    
    # Compute the target value (for critic)
    v_target = r + gamma * model.v(o_prime, H_prime) * done
    # Calculate temporal difference (TD) error
    td = r + gamma * model.v(o_prime, H_prime) * done - model.v(o, H)
    td = td.detach().numpy()  # Detach TD from computation graph
    
    # Generalized Advantage Estimation (GAE)
    advantage = []
    A = 0.0  # Initialize advantage to 0
    for delta in td[::-1].flatten():  # Traverse TD errors in reverse order
        A = delta + gamma * lmbda * A  # Compute advantage using smoothing factor lmbda
        advantage.append([A])
    advantage.reverse()  # Reverse to match the time step order
    advantage = torch.tensor(advantage, dtype=torch.float)  # Convert to tensor
    
    # Policy and Value network updates for K epochs
    for i in range(K_epoch):
        # Compute the current policy probability distribution
        pdf = torch.distributions.MultivariateNormal(model.pi(o, H)[0], var*torch.eye(out_size))
        prob = torch.exp(pdf.log_prob(a)).view(len(a), 1)  # Action probabilities under current policy
        
        # Calculate the probability ratio between current and old policies
        ratio = torch.exp(torch.log(prob) - torch.log(prob_old))  # ratio = prob / prob_old
        
        # Compute actor loss using clipped objective
        loss_actor = torch.min(ratio * advantage, torch.clamp(ratio, 1-eps_clip, 1+eps_clip) * advantage)
        # Compute critic loss (mean squared error between predicted and target values)
        loss_critic = F.mse_loss(model.v(o, H), v_target.detach())
        # Total loss is the negative of the actor loss minus the weighted critic loss
        loss = -(loss_actor - C_value * loss_critic)
        
        # Backpropagation and optimization step
        optimizer.zero_grad()  # Clear gradients from the previous step
        loss.mean().backward(retain_graph=True)  # Backpropagate the loss
        optimizer.step()  # Update the model parameters using the optimizer


In [5]:
# Define constants for the battery environment
E_max   = Battery_Size  # Maximum energy storage capacity of the battery
P_max   = E_max         # Maximum power output of the battery, equal to E_max
tdelta  = unit / 4      # Time interval for trading, a quarter of the unit period
soc_min = 0.1           # Minimum allowed State of Charge (SOC)
soc_max = 0.9           # Maximum allowed State of Charge (SOC)

# Coefficients for battery voltage and resistance models
a0 = -1.031; a1 = 35; a2 = 3.685; a3 = 0.2156; a4 = 0.1178; a5 = 0.3201
b0 = 0.1463; b1 = 30.27; b2 = 0.1037; b3 = 0.0584; b4 = 0.1747; b5 = 0.1288
c0 = 0.1063; c1 = 62.49; c2 = 0.0437; d0 = 0.0712; d1 = 61.4; d2 = 0.0288

N = 130 * 215 * E_max / 0.1  # A constant related to battery characteristics
beta = 10 / max_price         # Penalization factor for energy imbalances, scaled by the max price

class Env():
    def __init__(self, data):
        # Initialize the environment with data
        self.data_gen = data[0]  # Generation data
        self.data_bid = data[1]   # Bid data
        self.data_imb = data[2]   # Imbalance data
        self.state = []            # Initialize state

    def reset(self):
        # Reset the environment for a new episode
        gen = self.data_gen[0]            # Get initial generation value
        imb = self.data_imb[0]            # Get initial imbalance value
        E = E_max / 2                      # Start with half the battery's capacity
        state = [[gen, imb, E]]            # Set the initial state
        self.state = state
        return state                        # Return the initial state

    def step(self, action):
        # Execute one time step within the environment based on the action
        gen = self.data_gen[len(self.state)]  # Get current generation
        bid = self.data_bid[len(self.state)]   # Get current bid
        bat = action[0]                        # Extract the battery action
        imb = self.data_imb[len(self.state)]   # Get current imbalance

        E = self.state[-1][-1]                  # Get current energy state
        soc = E / E_max                          # Calculate SOC
        # Calculate battery voltage and resistance based on SOC
        Voc = a0 * np.exp(-a1 * soc) + a2 + a3 * soc - a4 * soc ** 2 + a5 * soc ** 3
        Rs = b0 * np.exp(-b1 * soc) + b2 + b3 * soc - b4 * soc ** 2 + b5 * soc ** 3
        Rts = c0 * np.exp(-c1 * soc) + c2
        Rtl = d0 * np.exp(-d1 * soc) + d2
        R = Rs + Rts + Rtl                      # Total resistance

        # Calculate maximum charging and discharging currents
        I_cmax = 1000000 * (E_max * soc_max - E) / N / (Voc * tdelta)
        I_dmax = 1000000 * (E - E_max * soc_min) / N / (Voc * tdelta)
        # Calculate maximum power for charging and discharging
        p_cmax = N * (Voc * I_cmax + I_cmax ** 2 * R)
        p_dmax = N * (Voc * I_dmax - I_dmax ** 2 * R)

        P_cmax = p_cmax / 1000000  # Convert to MW
        P_dmax = p_dmax / 1000000  # Convert to MW
        # Determine the actual charging and discharging power
        P_c = min(max(-bat * E_max, 0), P_max, P_cmax)  # Power for charging
        P_d = min(max(bat * E_max, 0), P_max, P_dmax)   # Power for discharging
        p_c = 1000000 * P_c / N  # Convert back to original units
        p_d = 1000000 * P_d / N  # Convert back to original units

        # Calculate currents based on the power
        I_c = -(Voc - np.sqrt(Voc ** 2 + 4 * R * p_c)) / (2 * R)
        I_d = (Voc - np.sqrt(Voc ** 2 - 4 * R * p_d)) / (2 * R)
        
        # Determine efficiency and update energy state based on action
        if not np.isclose(p_c, 0):
            # Charging case
            eff_c = (Voc * I_c) / p_c  # Efficiency for charging
            eff_d = 1  # Assume no efficiency loss for discharging
            E_prime = E + eff_c * P_c * tdelta  # Update energy state
            disp = gen - P_c  # Determine dispatched generation
            bid = bid - P_c  # Update bid
        elif not np.isclose(p_d, 0):
            # Discharging case
            eff_d = p_d / (Voc * I_d)  # Efficiency for discharging
            eff_c = 1  # Assume no efficiency loss for charging
            E_prime = E - (1 / eff_d) * P_d * tdelta  # Update energy state
            disp = gen + P_d  # Determine dispatched generation
            bid = bid + P_d  # Update bid
        else:
            # No charging or discharging
            eff_c = 1  
            eff_d = 1
            E_prime = E  # Energy remains the same
            disp = gen  # No dispatch changes

        # Calculate revenue based on the imbalance and dispatched generation
        revenue = (imb * disp - imb * abs(bid - disp) - beta * (P_c + P_d)) * tdelta
        
        next_state = self.state + [[gen, imb, E_prime]]  # Update state with new values
        # Calculate the reward based on the action taken
        reward = (imb * (P_d - P_c) - beta * (P_c + P_d) - abs(P_c - max(-bat * E_max, 0)) - abs(P_d - max(bat * E_max, 0))) * tdelta
        done = False  # Indicate that the episode is not done
        # Additional info for analysis
        info = [gen, bid, bat, disp, revenue]

        self.state = next_state  # Update the environment's state
        return next_state, reward, done, info  # Return the next state, reward, done status, and info


In [8]:
# PPO Training

total_episode = 500
max_iteration = int(len(data_train)/T_horizon)
print_interval = 1
 
model = LSTM()
env_train = Env([data_train, train_predict, price_train])
env_val   = Env([data_val, val_predict, price_val])
env_test  = Env([data_test, test_predict, price_test])
bid_train, bid_val, bid_test = [], [], [] # Bidding Value
bat_train, bat_val, bat_test = [], [], [] # Discharging Value
mae_train, mae_val, mae_test = [], [], [] # Mean Absolute Error
mbe_train, mbe_val, mbe_test = [], [], [] # Mean Bidding Error
rev_train, rev_val, rev_test = [], [], [] # Revenue

In [9]:
# Initialize the optimizer for the model using Adam optimizer with a specified learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Loop over the number of episodes for training
for n_epi in range(total_episode):
    # Initialize lists to store bid, battery, MAE, MBE, and revenue for train, validation, and test datasets
    bid_train += [[]]
    bid_val += [[]]
    bid_test += [[]]
    bat_train += [[]]
    bat_val += [[]]
    bat_test += [[]]
    mae_train += [[]]
    mae_val += [[]]
    mae_test += [[]]
    mbe_train += [[]]
    mbe_val += [[]]
    mbe_test += [[]]
    rev_train += [[]]
    rev_val += [[]]
    rev_test += [[]]

    # Reset the training environment and get the initial state
    state = env_train.reset()
    # Initialize hidden state history for the recurrent model
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))

    # Loop for a maximum number of iterations per episode
    for i in range(max_iteration):
        batch = [[], []]  # Initialize a batch to store state-action-reward transitions
        for t in range(T_horizon):
            # Get the policy output from the model
            pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
            # Sample an action from the output policy distribution
            action = np.random.multivariate_normal(pi_out.detach().numpy()[0], 
                                                   var * np.identity(out_size), 1)[0].tolist()
            # Take a step in the environment based on the action and receive the next state and reward
            next_state, reward, done, info = env_train.step(action)

            # Store the experience in the batch
            batch[0].append((state[-1], action, reward, next_state[-1], done))
            batch[1].append((history, next_history))
            state = next_state[:]  # Update the current state
            history = next_history   # Update the history

            # Extract useful information from the environment step
            gen = info[0]
            bid = info[1]
            bat = info[2]
            disp = info[3]
            revenue = info[4]

            # Collect training statistics
            bid_train[n_epi] += [bid]
            bat_train[n_epi] += [bat]
            mae_train[n_epi] += [abs(gen - bid)]
            mbe_train[n_epi] += [abs(disp - bid)]
            rev_train[n_epi] += [revenue]

            # Break the loop if the episode is done
            if done:
                break
        
        # Train the model if it's not the first episode
        if n_epi != 0:
            train_net(model, batch, optimizer)
        if done:  # Break if the episode is done
            break
    
    # Validation phase
    state = env_val.reset()  # Reset validation environment
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))

    # Loop over validation data
    for k in range(len(env_val.data_gen) - 1):
        pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
        action = pi_out[0].tolist()  # Get action from policy output
        next_state, reward, done, info = env_val.step(action)  # Take a step in validation environment

        # Update state and history for validation
        state = next_state[:]
        history = next_history
        
        # Extract information for validation statistics
        gen = info[0]
        bid = info[1]
        bat = info[2]
        disp = info[3]
        revenue = info[4]
        
        # Collect validation statistics
        bid_val[n_epi] += [bid]
        bat_val[n_epi] += [bat]
        mae_val[n_epi] += [abs(gen - bid)]
        mbe_val[n_epi] += [abs(disp - bid)]
        rev_val[n_epi] += [revenue]
    
    # Testing phase
    state = env_test.reset()  # Reset testing environment
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))

    # Loop over test data
    for l in range(len(env_test.data_gen) - 1):
        pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
        action = pi_out[0].tolist()  # Get action from policy output
        next_state, reward, done, info = env_test.step(action)  # Take a step in testing environment

        # Update state and history for testing
        state = next_state[:]
        history = next_history
        
        # Extract information for testing statistics
        gen = info[0]
        bid = info[1]
        bat = info[2]
        disp = info[3]
        revenue = info[4]

        # Collect testing statistics
        bid_test[n_epi] += [bid]
        bat_test[n_epi] += [bat]
        mae_test[n_epi] += [abs(gen - bid)]
        mbe_test[n_epi] += [abs(disp - bid)]
        rev_test[n_epi] += [revenue]
    
    # Print statistics at specified intervals
    if (n_epi + 1) % print_interval == 0:
        MAE_train = round(100 * np.mean(mae_train[n_epi]), 2)
        MAE_val = round(100 * np.mean(mae_val[n_epi]), 2)
        MAE_test = round(100 * np.mean(mae_test[n_epi]), 2)
        MBE_train = round(100 * np.mean(mbe_train[n_epi]), 2)
        MBE_val = round(100 * np.mean(mbe_val[n_epi]), 2)
        MBE_test = round(100 * np.mean(mbe_test[n_epi]), 2)
        REV_train = round(max_price * RE_Capacity1 * np.mean(rev_train[n_epi]), 3)
        REV_val = round(max_price * RE_Capacity2 * np.mean(rev_val[n_epi]), 3)
        REV_test = round(max_price * RE_Capacity3 * np.mean(rev_test[n_epi]), 3)

        # Print out the results for this episode
        print("episode: {}".format(n_epi + 1))
        print("MAE_train: {}%".format(MAE_train).ljust(25), end="")
        print("MAE_val: {}%".format(MAE_val).ljust(25), end="")
        print("MAE_test: {}%".format(MAE_test).ljust(25))
        print("MBE_train: {}%".format(MBE_train).ljust(25), end="")
        print("MBE_val: {}%".format(MBE_val).ljust(25), end="")
        print("MBE_test: {}%".format(MBE_test).ljust(25))
        print("REV_train: ${}".format(REV_train).ljust(25), end="")
        print("REV_val: ${}".format(REV_val).ljust(25), end="")
        print("REV_test: ${}".format(REV_test).ljust(25))
        print("------------------------------------------------------------------------------------------")


episode: 1
MAE_train: 42.53%        MAE_val: 42.16%          MAE_test: 42.38%         
MBE_train: 42.53%        MBE_val: 42.16%          MBE_test: 42.37%         
REV_train: $-143.557     REV_val: $-163.324       REV_test: $-167.552      
------------------------------------------------------------------------------------------


In [None]:
# Select the episode with the highest mean revenue from the validation results
select_num = np.argmax(np.mean(rev_val[:-1], axis=1))

# Extract corresponding test data for the selected episode
select_test = np.array(bid_test[select_num][:])          # Selected bids from test
select_test_bat = np.array(bat_test[select_num][:])      # Selected battery outputs from test
select_test_real = np.array(data_test[1:])               # Actual generation data from the test set
select_test_price = np.array(price_test[1:])             # Prices corresponding to the test data

# Initialize the energy state to half of the maximum capacity
E = E_max / 2

# Lists to store mean bias error, rewards, and additional info during evaluation
mbe = []
reward = []
info = []

# Iterate through each selected bid in the test set
for i in range(len(select_test)):
    bid = select_test[i]                     # Current bid from the model
    gen = select_test_real[i]                # Actual generation from the test data
    bat = select_test_bat[i]                 # Battery output for the current bid
    imb = select_test_price[i]               # Price for the current time step

    # Calculate the state of charge (soc) as a fraction of the maximum capacity
    soc = E / E_max

    # Calculate voltage (Voc) and resistance (R) based on soc using polynomial expressions
    Voc = a0 * np.exp(-a1 * soc) + a2 + a3 * soc - a4 * soc**2 + a5 * soc**3
    Rs = b0 * np.exp(-b1 * soc) + b2 + b3 * soc - b4 * soc**2 + b5 * soc**3
    Rts = c0 * np.exp(-c1 * soc) + c2
    Rtl = d0 * np.exp(-d1 * soc) + d2
    R = Rs + Rts + Rtl  # Total resistance

    # Calculate maximum charge/discharge current based on state of charge and voltage
    I_cmax = 1000000 * E_max * (soc_max - soc) / N / (Voc * tdelta)
    I_dmax = 1000000 * E_max * (soc - soc_min) / N / (Voc * tdelta)

    # Calculate maximum power for charging and discharging
    p_cmax = N * (Voc * I_cmax + I_cmax**2 * R)
    p_dmax = N * (Voc * I_dmax - I_dmax**2 * R)

    # Calculate actual charging and discharging power based on limits
    P_cmax = p_cmax / 1000000  # Convert to megawatts
    P_dmax = p_dmax / 1000000  # Convert to megawatts
    P_c = min(max(-bat * E_max, 0), P_max, P_cmax)  # Charging power
    P_d = min(max(bat * E_max, 0), P_max, P_dmax)   # Discharging power
    p_c = 1000000 * P_c / N  # Convert to watts
    p_d = 1000000 * P_d / N  # Convert to watts

    # Calculate currents for charging and discharging
    I_c = -(Voc - np.sqrt(Voc**2 + 4 * R * p_c)) / (2 * R)  # Current for charging
    I_d = (Voc - np.sqrt(Voc**2 - 4 * R * p_d)) / (2 * R)   # Current for discharging

    # Evaluate charging scenario
    if not np.isclose(p_c, 0):
        eff_c = (Voc * I_c) / p_c  # Efficiency for charging
        E = E + eff_c * P_c * tdelta  # Update energy state after charging
        disp = gen - P_c  # Dispatched generation considering charging
        info += [[gen, round(bid, 4), 'C', round(P_c, 4), round(disp, 4), round(eff_c, 4), round(E, 4)]]
    # Evaluate discharging scenario
    elif not np.isclose(p_d, 0):
        eff_d = p_d / (Voc * I_d)  # Efficiency for discharging
        E = E - (1 / eff_d) * P_d * tdelta  # Update energy state after discharging
        disp = gen + P_d  # Dispatched generation considering discharging
        info += [[gen, round(bid, 4), 'D', round(P_d, 4), round(disp, 4), round(eff_d, 4), round(E, 4)]]
    # If neither charging nor discharging is possible
    else:
        disp = gen  # No change in dispatched generation
        info += [[gen, round(bid, 4), 'N', 'N', round(disp, 4), 'N', round(E, 4)]]

    # Calculate the mean bias error between the bid and dispatched generation
    mbe += [abs(bid - disp)]
    # Calculate the reward based on the generated revenue and penalties
    reward += [(imb * disp - imb * abs(bid - disp) - beta * (P_c + P_d)) * tdelta]

# Calculate mean absolute error (MAE) and mean bias error (MBE) for the test set
MAE_test = round(100 * np.mean(np.abs(select_test_real - select_test)), 2)
MBE_test = round(100 * np.mean(mbe), 2)

# Print evaluation metrics
print("MAE_test: {}%".format(MAE_test))
print("MBE_test: {}%".format(MBE_test))
print("REV_test: ${}".format(round(max_price * RE_Capacity3 * np.mean(reward), 3)))

# Store the results for further analysis or saving to a file
result = {}
result['1'] = select_test_bat
pd.DataFrame(result).to_csv("./Results/"+RE+"_Model2_Arbitrage.csv")
