In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# Set the base name for the datasets and the directory path
RE = "Solar_PBE"  # Base name for the dataset
address = "../data/"  # Directory path where data files are stored

# Load training data from CSV files and concatenate them
data_train_csv1 = pd.read_csv(address + RE + '_16.csv', index_col=0)  # Load first part of training data
data_train_csv2 = pd.read_csv(address + RE + '_17.csv', index_col=0)  # Load second part of training data
data_train_csv = pd.concat([data_train_csv1, data_train_csv2])  # Concatenate both training data frames

# Load validation and test data from CSV files
data_val_csv = pd.read_csv(address + RE + '_18.csv', index_col=0)  # Load validation data
data_test_csv = pd.read_csv(address + RE + '_19.csv', index_col=0)  # Load test data

# Load price data from CSV file containing imbalance prices
data_price = pd.read_csv(address + 'Price_Elia_Imbalance_16_19.csv', index_col=0)  # Load price data

# Assign positive imbalance prices to the corresponding datasets
data_train_csv['Price(€)'] = data_price['Positive imbalance price'][:len(data_train_csv)]  # Add price column to training data
data_val_csv['Price(€)'] = data_price['Positive imbalance price'][len(data_train_csv):len(data_train_csv) + len(data_val_csv)]  # Add price column to validation data
data_test_csv['Price(€)'] = data_price['Positive imbalance price'][len(data_train_csv) + len(data_val_csv):]  # Add price column to test data


In [3]:
# Define the battery size and unit for aggregation
Battery_Size = 0.15 
unit         = 1 

# Determine the maximum capacities and prices from the datasets
RE_Capacity1 = max(data_train_csv['Power(MW)'])  # Max power for training data
RE_Capacity2 = max(data_val_csv['Power(MW)'])    # Max power for validation data
RE_Capacity3 = max(data_test_csv['Power(MW)'])   # Max power for test data
max_price = max(data_price['Marginal incremental price'])  # Max price across all data

# Calculate the sizes of the datasets based on the defined unit
size_train0 = int(len(data_train_csv) / unit)
size_val0   = int(len(data_val_csv) / unit)
size_test0  = int(len(data_test_csv) / unit)

# Initialize lists to hold processed training data
data_train0 = []  
data_train = []  
price_train0 = []  
price_train = []

# Process training data
for i in range(size_train0):
    # Calculate mean normalized power and price for each unit segment
    data_train0.append(round(pd.Series.mean(data_train_csv['Power(MW)'][i*unit:(i+1)*unit]) / RE_Capacity1, 3))
    price_train0.append(round(pd.Series.mean(data_train_csv['Price(€)'][i*unit:(i+1)*unit]) / max_price, 3))
    # Filter out zero or negative values
    if data_train0[i] > 0:
        data_train.append(data_train0[i])
        price_train.append(price_train0[i])

# Initialize lists for validation data
data_val0 = []  
data_val = []  
price_val0 = []  
price_val = []

# Process validation data
for i in range(size_val0):
    data_val0.append(round(pd.Series.mean(data_val_csv['Power(MW)'][i*unit:(i+1)*unit]) / RE_Capacity2, 3))
    price_val0.append(round(pd.Series.mean(data_val_csv['Price(€)'][i*unit:(i+1)*unit]) / max_price, 3))
    if data_val0[i] > 0:
        data_val.append(data_val0[i])
        price_val.append(price_val0[i])

# Initialize lists for test data
data_test0 = []  
data_test = []  
price_test0 = []  
price_test = []

# Process test data
for i in range(size_test0):
    data_test0.append(round(pd.Series.mean(data_test_csv['Power(MW)'][i*unit:(i+1)*unit]) / RE_Capacity3, 3))
    price_test0.append(round(pd.Series.mean(data_test_csv['Price(€)'][i*unit:(i+1)*unit]) / max_price, 3))
    if data_test0[i] > 0:
        data_test.append(data_test0[i])
        price_test.append(price_test0[i])


In [4]:
# PPO Agent (Partially Observable State, Continuous Action Space)
# Assumption 1: Standard deviation is fixed
# Assumption 2: History is composed of observations only
 
n_layers         = 2              # Number of LSTM layers in the network
in_size          = 2              # Number of input features for the model
hidden_size      = 64             # Number of hidden units in each LSTM layer
out_size         = 1              # Number of actions (output dimension)
T_horizon        = 128            # Time horizon for the PPO algorithm (number of time steps)
learning_rate    = 0.001          # Learning rate for the optimizer (controls step size)
K_epoch          = 3              # Number of epochs to update the model per batch
gamma            = 0.99           # Discount factor for future rewards (values future rewards)
lmbda            = 0.95           # Lambda parameter for Generalized Advantage Estimation (controls bias-variance tradeoff)
eps_clip         = 0.01           # Clipping parameter for the PPO algorithm (limits the size of policy updates)
C_value          = 1              # Coefficient for the critic loss in the overall loss function
var              = 0.1**2         # Variance for the action distribution (controls exploration)

# Define the LSTM network architecture
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()
        self.fc_s  = nn.Linear(in_size, hidden_size)  # Fully connected layer for input state to hidden
        self.rnn   = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)  # LSTM layer
        self.fc_pi = nn.Linear(hidden_size, out_size)  # Fully connected layer for action policy output
        self.fc_v  = nn.Linear(hidden_size, 1)         # Fully connected layer for value function output
 
    # Policy function to calculate the action distribution
    def pi(self, x, hidden):
        x = F.relu(self.fc_s(x))                      # Apply activation function to hidden layer output
        x = x.view(1, -1, hidden_size)                # Reshape for LSTM input
        x, hidden = self.rnn(x, hidden)               # Pass through the LSTM layer
        pi = self.fc_pi(x)                            # Get action probabilities
        pi = pi.view(-1, out_size)                   # Reshape action output
        return pi, hidden                             # Return action probabilities and hidden state
    
    # Value function to estimate the expected future rewards
    def v(self, x, hidden):
        x = F.relu(self.fc_s(x))                      # Apply activation function to hidden layer output
        x = x.view(1, -1, hidden_size)                # Reshape for LSTM input
        x, hidden = self.rnn(x, hidden)               # Pass through the LSTM layer
        v = self.fc_v(x)                              # Get value estimates
        v = v.view(-1, 1)                             # Reshape value output
        return v                                      # Return estimated value
 
# Function to train the PPO network
def train_net(model, batch, optimizer):
    # Initialize lists for storing observations, actions, rewards, etc.
    o, H, a, r, o_prime, H_prime, done = [], [], [], [], [], [], []
    
    # Process each transition in the first batch
    for transition in batch[0]:
        o.append(transition[0])                    # Append observation to list
        a.append(transition[1])                    # Append action to list
        r.append([transition[2]])                  # Append reward to list
        o_prime.append(transition[3])              # Append next observation to list
        done.append([0]) if transition[4] else done.append([1])  # Append done flag (0: not done, 1: done)
    
    # Process the history transitions
    for transition in batch[1]:
        H.append(transition[0])                    # Append current hidden state to list
        H_prime.append(transition[1])              # Append next hidden state to list
        
    # Convert lists to PyTorch tensors
    o         = torch.tensor(o, dtype=torch.float)  # Convert observations to tensor
    H         = (H[0][0].detach(), H[0][1].detach()) # Detach hidden states from graph
    a         = torch.tensor(a, dtype=torch.float)  # Convert actions to tensor
    r         = torch.tensor(r, dtype=torch.float)  # Convert rewards to tensor
    o_prime   = torch.tensor(o_prime, dtype=torch.float)  # Convert next observations to tensor
    H_prime   = (H_prime[0][0].detach(), H_prime[0][1].detach()) # Detach next hidden states
    done      = torch.tensor(done)                   # Convert done flags to tensor
 
    # Calculate the old probability distribution of actions
    pdf_old = torch.distributions.MultivariateNormal(model.pi(o, H)[0], var * torch.eye(out_size))
    prob_old = torch.exp(pdf_old.log_prob(a)).view(len(a), 1)  # Calculate old action probabilities
    prob_old = prob_old.detach()                          # Detach from computation graph
 
    # Calculate target values for the critic
    v_target = r + gamma * model.v(o_prime, H_prime) * done
    td = r + gamma * model.v(o_prime, H_prime) * done - model.v(o, H)  # Temporal Difference error
    td = td.detach().numpy()                             # Detach and convert to numpy for advantage calculation
    advantage = []
    A = 0.0
    # Calculate advantage estimates using GAE
    for delta in td[::-1].flatten():
        A = delta + gamma * lmbda * A                # Update advantage based on TD error
        advantage.append([A])
    advantage.reverse()                                 # Reverse to maintain correct order
    advantage = torch.tensor(advantage, dtype=torch.float)  # Convert advantage to tensor
    
    # Update the policy and value networks
    for i in range(K_epoch):
        pdf = torch.distributions.MultivariateNormal(model.pi(o, H)[0], var * torch.eye(out_size))
        prob = torch.exp(pdf.log_prob(a)).view(len(a), 1)  # Calculate current action probabilities
        ratio = torch.exp(torch.log(prob) - torch.log(prob_old))  # Calculate probability ratio
 
        # Calculate loss for the actor (policy) and critic (value function)
        loss_actor = torch.min(ratio * advantage, torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip) * advantage)
        loss_critic = F.mse_loss(model.v(o, H), v_target.detach())  # Mean squared error loss for critic
        loss = -(loss_actor - C_value * loss_critic)                 # Overall loss (actor + critic)
        
        optimizer.zero_grad()                                     # Reset gradients
        loss.mean().backward(retain_graph=True)                  # Backpropagate loss
        optimizer.step()                                         # Update model parameters


In [5]:
# Environment parameters
E_max   = Battery_Size          # Maximum energy capacity of the battery
P_max   = E_max                 # Maximum power output, equal to maximum energy capacity
tdelta  = unit / 4              # Time delta for the simulation, adjusted based on the unit
soc_min = 0.1                   # Minimum state of charge for the battery
soc_max = 0.9                   # Maximum state of charge for the battery

# Coefficients for battery model parameters
a0 = -1.031
a1 = 35
a2 = 3.685
a3 = 0.2156
a4 = 0.1178
a5 = 0.3201

b0 = 0.1463
b1 = 30.27
b2 = 0.1037
b3 = 0.0584
b4 = 0.1747
b5 = 0.1288

c0 = 0.1063
c1 = 62.49
c2 = 0.0437

d0 = 0.0712
d1 = 61.4
d2 = 0.0288

N = 130 * 215 * E_max / 0.1  # Normalization factor based on battery capacity
beta = 10 / max_price        # Scaling factor for price normalization

class Env():
    def __init__(self, data, price):
        self.data = data                # Input data for the environment
        self.price = price              # Price data associated with the environment
        self.state = []                 # Initialize the state of the environment

    def reset(self):
        gen = self.data[0]             # Initialize generator output from the data
        E = E_max / 2                  # Start with half of the maximum energy capacity
        state = [[gen, E]]             # Set initial state with generator output and energy
        self.state = state              # Store the current state
        return state                    # Return the initial state

    def step(self, action):
        gen = self.data[len(self.state)]  # Get the generator output for the current step
        bid = action[0]                    # Get the bid from the action
        imb = self.price[len(self.state)]  # Get the imbalance price for the current step

        E = self.state[-1][-1]             # Retrieve the current energy level
        soc = E / E_max                     # Calculate the state of charge
        # Calculate battery voltage based on state of charge
        Voc = a0 * np.exp(-a1 * soc) + a2 + a3 * soc - a4 * soc**2 + a5 * soc**3
        # Calculate battery series resistance based on state of charge
        Rs  = b0 * np.exp(-b1 * soc) + b2 + b3 * soc - b4 * soc**2 + b5 * soc**3
        # Calculate total resistance (thermal and other losses)
        Rts = c0 * np.exp(-c1 * soc) + c2
        Rtl = d0 * np.exp(-d1 * soc) + d2
        R   = Rs + Rts + Rtl              # Total resistance

        # Calculate maximum charging current based on available energy
        I_cmax = 1000000 * (E_max * soc_max - E) / N / (Voc * tdelta)
        # Calculate maximum discharging current based on remaining energy
        I_dmax = 1000000 * (E - E_max * soc_min) / N / (Voc * tdelta)
        # Calculate power available for charging
        p_cmax = N * (Voc * I_cmax + I_cmax**2 * R)
        # Calculate power available for discharging
        p_dmax = N * (Voc * I_dmax - I_dmax**2 * R)

        # Limit the charging and discharging power
        P_cmax = p_cmax / 1000000
        P_dmax = p_dmax / 1000000
        P_c = min(max(gen - bid, 0), P_max, P_cmax)  # Charging power
        P_d = min(max(bid - gen, 0), P_max, P_dmax)  # Discharging power
        p_c = 1000000 * P_c / N
        p_d = 1000000 * P_d / N

        # Calculate charging current based on voltage and resistance
        I_c = -(Voc - np.sqrt(Voc**2 + 4 * R * p_c)) / (2 * R)
        # Calculate discharging current based on voltage and resistance
        I_d = (Voc - np.sqrt(Voc**2 - 4 * R * p_d)) / (2 * R)

        # Determine efficiency and update energy state based on action taken
        if not np.isclose(p_c, 0):
            eff_c = (Voc * I_c) / p_c  # Efficiency during charging
            eff_d = 1                   # Efficiency during discharging assumed to be 1
            E_prime = E + eff_c * P_c * tdelta  # Update energy after charging
            disp = gen - P_c  # Dispatch during charging
        elif not np.isclose(p_d, 0):
            eff_d = p_d / (Voc * I_d)  # Efficiency during discharging
            eff_c = 1                   # Efficiency during charging assumed to be 1
            E_prime = E - (1 / eff_d) * P_d * tdelta  # Update energy after discharging
            disp = gen + P_d  # Dispatch during discharging
        else:
            eff_c = 1  # No charging
            eff_d = 1  # No discharging
            E_prime = E  # Energy remains the same
            disp = gen  # Dispatch is equal to generator output

        error = bid - disp  # Calculate the error between the bid and the dispatched power
        error_function = abs(error) + beta * P_c + beta * P_d  # Calculate error function
        revenue = (imb * disp - imb * abs(bid - disp) - beta * (P_c + P_d)) * tdelta  # Calculate revenue

        next_state = self.state + [[gen, E_prime]]  # Update state with new generator output and energy
        reward = -error_function  # Reward is the negative of the error function
        done = False  # The episode is not done
        info = [gen, bid, disp, revenue]  # Additional information returned for analysis

        self.state = next_state  # Update the current state
        return next_state, reward, done, info  # Return the next state, reward, done flag, and additional info


In [6]:
# Number of episodes for training the PPO agent
total_episode = 500

# Calculate maximum iterations per episode based on training data size and time horizon
max_iteration = int(len(data_train) / T_horizon)

# Print output interval for monitoring training progress
print_interval = 1

# Initialize the LSTM model for the PPO agent
model = LSTM()

# Create environment instances for training, validation, and testing
env_train = Env(data_train, price_train)  # Training environment with training data and prices
env_val   = Env(data_val, price_val)      # Validation environment with validation data and prices
env_test  = Env(data_test, price_test)    # Testing environment with testing data and prices

# Initialize lists to store bidding values for each phase
bid_train, bid_val, bid_test = [], [], []  # Bidding values for training, validation, and testing

# Initialize lists to track performance metrics during training and evaluation
mae_train, mae_val, mae_test = [], [], []   # Mean Absolute Error for training, validation, and testing
mbe_train, mbe_val, mbe_test = [], [], []   # Mean Bidding Error for training, validation, and testing
rev_train, rev_val, rev_test = [], [], []    # Revenue for training, validation, and testing


In [8]:
# Initialize the Adam optimizer for the model with the specified learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Loop over the specified number of episodes for training
for n_epi in range(total_episode):
    # Initialize lists for bidding values and performance metrics for each episode
    bid_train += [[]]; bid_val += [[]]; bid_test += [[]]
    mae_train += [[]]; mae_val += [[]]; mae_test += [[]]
    mbe_train += [[]]; mbe_val += [[]]; mbe_test += [[]]
    rev_train += [[]]; rev_val += [[]]; rev_test += [[]]

    # Reset the training environment to get the initial state
    state = env_train.reset()

    # Initialize hidden states for the LSTM model
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))

    # Iterate through the maximum number of iterations per episode
    for i in range(max_iteration):
        batch = [[], []]  # Prepare a batch for storing experience tuples

        # Collect actions and rewards over the time horizon
        for t in range(T_horizon):
            # Get action probabilities and the next hidden state from the model
            pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)

            # Sample an action from the probability distribution
            action = np.random.multivariate_normal(pi_out.detach().numpy()[0], 
                                                   var * np.identity(out_size), 1)[0].tolist()

            # Take a step in the environment using the selected action
            next_state, reward, done, info = env_train.step(action)

            # Append the experience tuple to the batch
            batch[0].append((state[-1], action, reward, next_state[-1], done))
            batch[1].append((history, next_history))

            # Update the state and history for the next iteration
            state = next_state[:]
            history = next_history

            # Extract relevant information from the environment feedback
            gen = info[0]; bid = info[1]; disp = info[2]; revenue = info[3]

            # Store bidding values and performance metrics
            bid_train[n_epi] += [bid]
            mae_train[n_epi] += [abs(gen - bid)]
            mbe_train[n_epi] += [abs(disp - bid)]
            rev_train[n_epi] += [revenue]

            # Break the loop if the episode is done
            if done:
                break
        
        # Train the model using the batch collected
        if n_epi != 0:
            train_net(model, batch, optimizer)
        if done:
            break
    
    # Validation Phase
    state = env_val.reset()  # Reset the validation environment
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))
    
    # Iterate through the validation data
    for k in range(len(env_val.data) - 1):
        pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
        action = pi_out[0].tolist()  # Get action from the model

        # Step the validation environment
        next_state, reward, done, info = env_val.step(action)

        # Update state and history
        state = next_state[:]
        history = next_history
        
        # Record information for validation metrics
        gen = info[0]; bid = info[1]; disp = info[2]; revenue = info[3]
        bid_val[n_epi] += [bid]
        mae_val[n_epi] += [abs(gen - bid)]
        mbe_val[n_epi] += [abs(disp - bid)]
        rev_val[n_epi] += [revenue]
    
    # Testing Phase
    state = env_test.reset()  # Reset the testing environment
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))

    # Iterate through the test data
    for l in range(len(env_test.data) - 1):
        pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
        action = pi_out[0].tolist()  # Get action from the model

        # Step the testing environment
        next_state, reward, done, info = env_test.step(action)

        # Update state and history
        state = next_state[:]
        history = next_history
        
        # Record information for testing metrics
        gen = info[0]; bid = info[1]; disp = info[2]; revenue = info[3]
        bid_test[n_epi] += [bid]
        mae_test[n_epi] += [abs(gen - bid)]
        mbe_test[n_epi] += [abs(disp - bid)]
        rev_test[n_epi] += [revenue]
    
    # Print metrics at specified intervals
    if (n_epi + 1) % print_interval == 0:
        MAE_train = round(100 * np.mean(mae_train[n_epi]), 2)
        MAE_val   = round(100 * np.mean(mae_val[n_epi]), 2)
        MAE_test  = round(100 * np.mean(mae_test[n_epi]), 2)
        MBE_train = round(100 * np.mean(mbe_train[n_epi]), 2)
        MBE_val   = round(100 * np.mean(mbe_val[n_epi]), 2)
        MBE_test  = round(100 * np.mean(mbe_test[n_epi]), 2)
        REV_train = round(max_price * RE_Capacity1 * np.mean(rev_train[n_epi]), 3)
        REV_val   = round(max_price * RE_Capacity2 * np.mean(rev_val[n_epi]), 3)
        REV_test  = round(max_price * RE_Capacity3 * np.mean(rev_test[n_epi]), 3)

        # Print the results for the current episode
        print("episode: {}".format(n_epi + 1))
        print("MAE_train: {}%".format(MAE_train).ljust(25), end="")
        print("MAE_val: {}%".format(MAE_val).ljust(25), end="")
        print("MAE_test: {}%".format(MAE_test).ljust(25))
        print("MBE_train: {}%".format(MBE_train).ljust(25), end="")
        print("MBE_val: {}%".format(MBE_val).ljust(25), end="")
        print("MBE_test: {}%".format(MBE_test).ljust(25))
        print("REV_train: ${}".format(REV_train).ljust(25), end="")
        print("REV_val: ${}".format(REV_val).ljust(25), end="")
        print("REV_test: ${}".format(REV_test).ljust(25))
        print("------------------------------------------------------------------------------------------")


In [None]:
# Environment

# Select the index of the minimum mean bias error (MBE) from the validation data (excluding the last element)
select_num = np.argmin(np.mean(mbe_val[:-1], axis=1))

# Retrieve the corresponding test bids based on the selected index
select_test = np.array(bid_test[select_num][:])

# Get the actual generated energy values from the test data
select_test_real = np.array(data_test[1:])

# Get the prices from the test data
select_test_price = np.array(price_test[1:])

# Initialize energy state at half of the maximum energy capacity
E = E_max / 2

# Initialize lists to store mean bias error (MBE), rewards, and information
mbe = []     # List to store absolute differences between bid and dispatched energy
reward = []  # List to store rewards calculated during the simulation
info = []    # List to store detailed information about each step

# Loop through each test bid and perform calculations
for i in range(len(select_test)):
    bid = select_test[i]         # Current bid value
    gen = select_test_real[i]    # Current actual generated energy
    imb = select_test_price[i]   # Current price

    soc = E / E_max              # Calculate state of charge as a fraction of maximum capacity

    # Calculate open-circuit voltage (Voc) based on state of charge (soc)
    Voc = a0 * np.exp(-a1 * soc) + a2 + a3 * soc - a4 * soc**2 + a5 * soc**3
    
    # Calculate series resistance (Rs) based on state of charge (soc)
    Rs = b0 * np.exp(-b1 * soc) + b2 + b3 * soc - b4 * soc**2 + b5 * soc**3
    
    # Calculate total resistance components
    Rts = c0 * np.exp(-c1 * soc) + c2
    Rtl = d0 * np.exp(-d1 * soc) + d2
    
    # Calculate total resistance (R)
    R = Rs + Rts + Rtl

    # Calculate maximum charging current based on energy capacity and state of charge
    I_cmax = 1000000 * E_max * (soc_max - soc) / N / (Voc * tdelta)
    
    # Calculate maximum discharging current based on energy capacity and state of charge
    I_dmax = 1000000 * E_max * (soc - soc_min) / N / (Voc * tdelta)
    
    # Calculate power capacity for charging
    p_cmax = N * (Voc * I_cmax + I_cmax**2 * R)
    
    # Calculate power capacity for discharging
    p_dmax = N * (Voc * I_dmax - I_dmax**2 * R)

    # Convert power capacities to megawatts
    P_cmax = p_cmax / 1000000
    P_dmax = p_dmax / 1000000
    
    # Calculate the actual power for charging, ensuring it's within limits
    P_c = min(max(gen - bid, 0), P_max, P_cmax)
    
    # Calculate the actual power for discharging, ensuring it's within limits
    P_d = min(max(bid - gen, 0), P_max, P_dmax)
    
    # Convert power to current (in A) for charging and discharging
    p_c = 1000000 * P_c / N
    p_d = 1000000 * P_d / N

    # Calculate current for charging using the quadratic formula
    I_c = -(Voc - np.sqrt(Voc**2 + 4 * R * p_c)) / (2 * R)
    
    # Calculate current for discharging using the quadratic formula
    I_d = (Voc - np.sqrt(Voc**2 - 4 * R * p_d)) / (2 * R)

    # If the charging power is not close to zero, calculate efficiency and update energy state
    if not np.isclose(p_c, 0):
        eff_c = (Voc * I_c) / p_c         # Calculate charging efficiency
        E = E + eff_c * P_c * tdelta       # Update energy state with charging energy
        disp = gen - P_c                    # Calculate dispatched energy
        info += [[gen, round(bid, 4), 'C', round(P_c, 4), round(disp, 4), round(eff_c, 4), round(E, 4)]]
    
    # If the discharging power is not close to zero, calculate efficiency and update energy state
    elif not np.isclose(p_d, 0):
        eff_d = p_d / (Voc * I_d)         # Calculate discharging efficiency
        E = E - (1 / eff_d) * P_d * tdelta  # Update energy state with discharging energy
        disp = gen + P_d                    # Calculate dispatched energy
        info += [[gen, round(bid, 4), 'D', round(P_d, 4), round(disp, 4), round(eff_d, 4), round(E, 4)]]
    
    # If neither charging nor discharging, just dispatch the generated energy
    else:
        disp = gen                          # Dispatch generated energy as is
        info += [[gen, round(bid, 4), 'N', 'N', round(disp, 4), 'N', round(E, 4)]]
    
    # Append the absolute difference between bid and dispatched energy to the MBE list
    mbe += [abs(bid - disp)]
    
    # Calculate the reward based on the dispatched energy and costs
    reward += [(imb * disp - imb * abs(bid - disp) - beta * (P_c + P_d)) * tdelta]

# Calculate Mean Absolute Error (MAE) for the test data
MAE_test = round(100 * np.mean(np.abs(select_test_real - select_test)), 2)

# Calculate Mean Bias Error (MBE) for the test data
MBE_test = round(100 * np.mean(mbe), 2)

# Print the results of the evaluation metrics
print("MAE_test: {}%".format(MAE_test))
print("MBE_test: {}%".format(MBE_test))
print("REV_test: ${}".format(round(max_price * RE_Capacity3 * np.mean(reward), 3)))

# Optionally save the selected test bids to a CSV file
pd.DataFrame(select_test).to_csv("./Results/"+RE+"_Model3_DeepComp.csv")
