In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
RE = "Wind_Wallonie_Elia" 
address = "../data/"

data_train_csv1 = pd.read_csv(address+RE+'_16.csv', index_col=0)
data_train_csv2 = pd.read_csv(address+RE+'_17.csv', index_col=0)
data_train_csv  = pd.concat([data_train_csv1, data_train_csv2])
data_val_csv    = pd.read_csv(address+RE+'_18.csv', index_col=0)
data_test_csv   = pd.read_csv(address+RE+'_19.csv', index_col=0)

data_price = pd.read_csv(address+'Price_Elia_Imbalance_16_19.csv', index_col=0)
data_train_csv['Price(€)'] = data_price['Positive imbalance price'][:len(data_train_csv)]
data_val_csv['Price(€)']   = data_price['Positive imbalance price'][len(data_train_csv):len(data_train_csv)+len(data_val_csv)]
data_test_csv['Price(€)']  = data_price['Positive imbalance price'][len(data_train_csv)+len(data_val_csv):]

In [3]:
Battery_Size = 0.15  # p.u.  # Define the battery size as 0.15 per unit (p.u.)
unit = 1  # Set the time unit to 1 (could represent 15 minutes, etc.)

# Calculate the maximum renewable energy capacity from the training, validation, and test datasets
RE_Capacity1 = max(data_train_csv['Power(MW)'])  # Max capacity in training data
RE_Capacity2 = max(data_val_csv['Power(MW)'])    # Max capacity in validation data
RE_Capacity3 = max(data_test_csv['Power(MW)'])   # Max capacity in test data

# Get the maximum price from the price dataset
max_price = max(data_price['Marginal incremental price'])  # Max price for normalizing price data

# Determine the number of units in each dataset
size_train0 = len(data_train_csv) // unit  # Number of units in training data
size_val0 = len(data_val_csv) // unit      # Number of units in validation data
size_test0 = len(data_test_csv) // unit    # Number of units in test data

# Function to normalize power and price data
def normalize_data(power_data, price_data, capacity, max_price, size):
    normalized_power = []  # Initialize list for normalized power data
    normalized_price = []   # Initialize list for normalized price data
    
    # Loop through each time unit
    for i in range(size):
        # Calculate the average power and price for the current time unit and normalize
        power_avg = pd.Series.mean(power_data[i * unit: (i + 1) * unit]) / capacity  # Normalized power
        price_avg = pd.Series.mean(price_data[i * unit: (i + 1) * unit]) / max_price  # Normalized price
        
        # Round the normalized values to 3 decimal places
        power_avg, price_avg = round(power_avg, 3), round(price_avg, 3)
        
        # Only append positive normalized power values to the list
        if power_avg > 0:
            normalized_power.append(power_avg)  # Add to normalized power list
            normalized_price.append(price_avg)    # Add to normalized price list
            
    return normalized_power, normalized_price  # Return normalized power and price lists

# Normalize the training, validation, and test datasets using the defined function
data_train, price_train = normalize_data(data_train_csv['Power(MW)'], data_train_csv['Price(€)'], RE_Capacity1, max_price, size_train0)
data_val, price_val = normalize_data(data_val_csv['Power(MW)'], data_val_csv['Price(€)'], RE_Capacity2, max_price, size_val0)
data_test, price_test = normalize_data(data_test_csv['Power(MW)'], data_test_csv['Price(€)'], RE_Capacity3, max_price, size_test0)


In [13]:
n_layers         = 2  # Number of LSTM layers
in_size          = 3  # Size of the input feature vector
hidden_size      = 64  # Size of the hidden state in the LSTM
out_size         = 2  # Size of the output vector (action space)
T_horizon        = 128  # Time horizon for predictions (number of time steps)
learning_rate    = 0.001  # Learning rate for the optimizer
K_epoch          = 3  # Number of epochs to train the model in one update
gamma            = 0.99  # Discount factor for future rewards
lmbda            = 0.95  # Lambda for GAE (Generalized Advantage Estimation)
eps_clip         = 0.01  # Epsilon for clipping in policy optimization
C_value          = 1  # Coefficient for critic loss
var              = 0.1**2  # Variance for action distribution

# Define the LSTM neural network class
class LSTM(nn.Module):
    def __init__(self):
        super(LSTM, self).__init__()  # Initialize the parent class
        self.fc_s  = nn.Linear(in_size, hidden_size)  # Fully connected layer from input size to hidden size
        self.rnn   = nn.LSTM(hidden_size, hidden_size, n_layers, batch_first=True)  # LSTM layer
        self.fc_pi = nn.Linear(hidden_size, out_size)  # Fully connected layer for policy (action probabilities)
        self.fc_v  = nn.Linear(hidden_size, 1)  # Fully connected layer for value function (state value)

    def pi(self, x, hidden):  # Policy function
        x = F.relu(self.fc_s(x))  # Apply linear transformation and ReLU activation
        x = x.view(1, -1, hidden_size)  # Reshape input for LSTM (sequence length, batch size, feature size)
        x, hidden = self.rnn(x, hidden)  # Pass through LSTM
        pi = self.fc_pi(x)  # Get action probabilities
        pi = pi.view(-1, out_size)  # Reshape output to (batch size, output size)
        return pi, hidden  # Return action probabilities and hidden state

    def v(self, x, hidden):  # Value function
        x = F.relu(self.fc_s(x))  # Same as pi function for state processing
        x = x.view(1, -1, hidden_size)  # Reshape for LSTM
        x, hidden = self.rnn(x, hidden)  # Pass through LSTM
        v = self.fc_v(x)  # Get state value
        v = v.view(-1, 1)  # Reshape output to (batch size, 1)
        return v  # Return state value

# Function to train the neural network
def train_net(model, batch, optimizer):
    # Initialize lists to store batch data
    o, H, a, r, o_prime, H_prime, done = [], [], [], [], [], [], []
    
    # Extract transition data from the batch
    for transition in batch[0]:
        o.append(transition[0])  # State
        a.append(transition[1])  # Action
        r.append([transition[2]])  # Reward
        o_prime.append(transition[3])  # Next state
        done.append([0]) if transition[4] else done.append([1])  # Done flag
        
    for transition in batch[1]:
        H.append(transition[0])  # Hidden state
        H_prime.append(transition[1])  # Next hidden state

    # Convert data to PyTorch tensors
    o = torch.tensor(o, dtype=torch.float)  # State tensor
    H = (H[0][0].detach(), H[0][1].detach())  # Detach hidden state for gradient tracking
    a = torch.tensor(a, dtype=torch.float)  # Action tensor
    r = torch.tensor(r, dtype=torch.float)  # Reward tensor
    o_prime = torch.tensor(o_prime, dtype=torch.float)  # Next state tensor
    H_prime = (H_prime[0][0].detach(), H_prime[0][1].detach())  # Detach next hidden state
    done = torch.tensor(done)  # Done tensor

    # Create a probability distribution for the old policy
    pdf_old = torch.distributions.MultivariateNormal(model.pi(o, H)[0], var * torch.eye(out_size))
    prob_old = torch.exp(pdf_old.log_prob(a)).view(len(a), 1)  # Old action probabilities
    prob_old = prob_old.detach()  # Detach from the computational graph

    # Calculate target value using rewards and next state's value
    v_target = r + gamma * model.v(o_prime, H_prime) * done  # Target value
    td = r + gamma * model.v(o_prime, H_prime) * done - model.v(o, H)  # Temporal difference (TD) error
    td = td.detach().numpy()  # Detach and convert to NumPy array
    advantage = []  # Initialize advantage list
    A = 0.0  # Initialize advantage

    # Calculate Generalized Advantage Estimation (GAE)
    for delta in td[::-1].flatten():  # Iterate through TD errors in reverse
        A = delta + gamma * lmbda * A  # Update advantage
        advantage.append([A])  # Append to advantage list
    advantage.reverse()  # Reverse to original order
    advantage = torch.tensor(advantage, dtype=torch.float)  # Convert advantage list to tensor

    # Optimize the policy and value networks
    for i in range(K_epoch):
        pdf = torch.distributions.MultivariateNormal(model.pi(o, H)[0], var * torch.eye(out_size))  # New policy distribution
        prob = torch.exp(pdf.log_prob(a)).view(len(a), 1)  # New action probabilities
        ratio = torch.exp(torch.log(prob) - torch.log(prob_old))  # Probability ratio for policy optimization

        # Calculate actor loss (policy loss)
        loss_actor = torch.min(ratio * advantage, torch.clamp(ratio, 1 - eps_clip, 1 + eps_clip) * advantage)
        # Calculate critic loss (value loss)
        loss_critic = F.mse_loss(model.v(o, H), v_target.detach())  # Mean squared error for value function
        loss = -(loss_actor - C_value * loss_critic)  # Combined loss (actor-critic)

        optimizer.zero_grad()  # Zero gradients for optimizer
        loss.mean().backward(retain_graph=True)  # Backpropagate loss
        optimizer.step()  # Update model parameters


In [14]:
# Environment parameters
E_max   = Battery_Size  # Maximum energy capacity of the battery
P_max   = E_max         # Maximum power output, equal to the maximum energy capacity
tdelta  = unit / 4      # Time step (e.g., 15 minutes if unit is in hours)
soc_min = 0.1          # Minimum state of charge (SOC) for the battery
soc_max = 0.9          # Maximum state of charge (SOC) for the battery

# Coefficients for the battery performance and cost equations
a0 = -1.031; a1 = 35; a2 = 3.685; a3 = 0.2156; a4 = 0.1178; a5 = 0.3201
b0 = 0.1463; b1 = 30.27; b2 = 0.1037; b3 = 0.0584; b4 = 0.1747; b5 = 0.1288
c0 = 0.1063; c1 = 62.49; c2 = 0.0437; d0 = 0.0712; d1 = 61.4; d2 = 0.0288

# Total number of units or capacity in the system (adjust based on configuration)
N = 130 * 215 * E_max / 0.1
beta = 10 / max_price  # A scaling factor based on the maximum price

class Env():
    def __init__(self, data):
        self.data_gen = data[0]  # Data for generation
        self.data_imb = data[1]   # Data for imbalance prices
        self.state = []            # Initialize the state of the environment
 
    def reset(self):
        # Reset the environment to the initial state
        gen = self.data_gen[0]   # Get the first generation value
        imb = self.data_imb[0]   # Get the first imbalance price
        E = E_max / 2            # Initialize the energy state to half the maximum capacity
        state = [[gen, imb, E]]  # Initialize the state with generation, imbalance, and energy
        self.state = state        # Update the state of the environment
        return state              # Return the initial state
 
    def step(self, action):
        # Execute a step in the environment based on the given action
        gen = self.data_gen[len(self.state)]   # Get current generation value based on the state length
        bid = action[0]                         # Bid amount from the action
        rat = action[1]                         # Rate from the action
        imb = self.data_imb[len(self.state)]   # Get current imbalance price based on the state length

        E = self.state[-1][-1]  # Get the current energy level from the state
        soc = E / E_max         # Calculate state of charge (SOC)

        # Calculate various parameters based on SOC
        Voc = a0 * np.exp(-a1 * soc) + a2 + a3 * soc - a4 * soc**2 + a5 * soc**3  # Open-circuit voltage
        Rs = b0 * np.exp(-b1 * soc) + b2 + b3 * soc - b4 * soc**2 + b5 * soc**3  # Series resistance
        Rts = c0 * np.exp(-c1 * soc) + c2  # Total resistance in the system
        Rtl = d0 * np.exp(-d1 * soc) + d2  # Total leakage resistance
        R = Rs + Rts + Rtl  # Combined resistance

        # Calculate maximum charging and discharging current and power
        I_cmax = 1000000 * (E_max * soc_max - E) / N / (Voc * tdelta)
        I_dmax = 1000000 * (E - E_max * soc_min) / N / (Voc * tdelta)
        p_cmax = N * (Voc * I_cmax + I_cmax**2 * R)  # Maximum charging power
        p_dmax = N * (Voc * I_dmax - I_dmax**2 * R)  # Maximum discharging power

        P_cmax = p_cmax / 1000000  # Convert power to MW
        P_dmax = p_dmax / 1000000  # Convert power to MW

        # Calculate actual charging and discharging power based on bid and generation
        P_c = min(max(rat * (gen - bid), 0), P_max, P_cmax)  # Charging power
        P_d = min(max(rat * (bid - gen), 0), P_max, P_dmax)  # Discharging power

        # Calculate currents based on charging and discharging power
        p_c = 1000000 * P_c / N  # Convert to proper scale
        p_d = 1000000 * P_d / N  # Convert to proper scale

        # Calculate charging and discharging currents using voltage and resistance
        I_c = -(Voc - np.sqrt(Voc**2 + 4 * R * p_c)) / (2 * R)  # Charging current
        I_d = (Voc - np.sqrt(Voc**2 - 4 * R * p_d)) / (2 * R)    # Discharging current
        
        # Update the energy state based on charging/discharging
        if not np.isclose(p_c, 0):  # If charging power is not zero
            eff_c = (Voc * I_c) / p_c  # Calculate charging efficiency
            eff_d = 1                   # Assume discharging efficiency is 1
            E_prime = E + eff_c * P_c * tdelta  # Update energy state after charging
            disp = gen - P_c            # Calculate dispatched generation
        elif not np.isclose(p_d, 0):  # If discharging power is not zero
            eff_d = p_d / (Voc * I_d)  # Calculate discharging efficiency
            eff_c = 1                   # Assume charging efficiency is 1
            E_prime = E - (1 / eff_d) * P_d * tdelta  # Update energy state after discharging
            disp = gen + P_d            # Calculate dispatched generation
        else:  # If neither charging nor discharging
            eff_c = 1; eff_d = 1  # Assume efficiencies are 1
            E_prime = E            # Energy state remains unchanged
            disp = gen             # Dispatch generation remains the same

        # Calculate revenue based on imbalance, dispatched generation, and costs
        revenue = (imb * disp - imb * abs(bid - disp) - beta * (P_c + P_d)) * tdelta

        # Update the next state with current generation, imbalance, and new energy level
        next_state = self.state + [[gen, imb, E_prime]]
        reward = revenue - imb * gen * tdelta  # Calculate reward
        done = False  # Environment is not done yet
        info = [gen, bid, rat, disp, revenue]  # Additional information for debugging
 
        self.state = next_state  # Update the state of the environment
        return next_state, reward, done, info  # Return the next state, reward, done flag, and info


In [15]:
# PPO Training

total_episode = 100
max_iteration = int(len(data_train)/T_horizon)
print_interval = 1
 
model = LSTM()
env_train = Env([data_train, price_train])
env_val   = Env([data_val, price_val])
env_test  = Env([data_test, price_test])
bid_train, bid_val, bid_test = [], [], [] # Bidding Value
rat_train, rat_val, rat_test = [], [], [] # Compensation Ratio
mae_train, mae_val, mae_test = [], [], [] # Mean Absolute Error
mbe_train, mbe_val, mbe_test = [], [], [] # Mean Bidding Error
rev_train, rev_val, rev_test = [], [], [] # Revenue

In [16]:
# Set up the optimizer for the model using Adam with the specified learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Iterate over the total number of episodes for training
for n_epi in range(total_episode):
    # Initialize lists to store bids, ratios, and revenue metrics for training, validation, and testing
    bid_train += [[]]; bid_val += [[]]; bid_test += [[]]
    rat_train += [[]]; rat_val += [[]]; rat_test += [[]]
    mae_train += [[]]; mae_val += [[]]; mae_test += [[]]
    mbe_train += [[]]; mbe_val += [[]]; mbe_test += [[]]
    rev_train += [[]]; rev_val += [[]]; rev_test += [[]]
 
    # Reset the training environment to start a new episode
    state = env_train.reset()
    # Initialize the hidden state for the recurrent model
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))

    # Iterate over the maximum number of iterations within an episode
    for i in range(max_iteration):
        # Initialize a batch to store transitions
        batch = [[],[]]
        
        # Sample actions for T_horizon timesteps
        for t in range(T_horizon):
            # Get the action probabilities from the policy network
            pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
            # Sample an action from the multivariate normal distribution
            action = np.random.multivariate_normal(pi_out.detach().numpy()[0], var*np.identity(out_size), 1)[0].tolist()
            # Take a step in the environment using the sampled action
            next_state, reward, done, info = env_train.step(action)
 
            # Store the transition in the batch
            batch[0].append((state[-1], action, reward, next_state[-1], done))
            batch[1].append((history, next_history))
            # Update the current state and history
            state = next_state[:]
            history = next_history
 
            # Unpack information from the environment step
            gen = info[0]; bid = info[1]; rat = info[2]; disp = info[3]; revenue = info[4]
            # Collect training data
            bid_train[n_epi] += [bid]
            rat_train[n_epi] += [rat]
            mae_train[n_epi] += [abs(gen - bid)]
            mbe_train[n_epi] += [abs(disp - bid)]
            rev_train[n_epi] += [revenue]
            # Break if the episode is done
            if done:
                break
        
        # Train the model if this is not the first episode
        if n_epi != 0:
            train_net(model, batch, optimizer)
        # Break if the episode is done
        if done:
            break
    
    # Validate the model using the validation environment
    state = env_val.reset()
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))
    for k in range(len(env_val.data_gen)-1):
        # Get action probabilities from the model
        pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
        action = pi_out[0].tolist()  # Choose the action from the output
        next_state, reward, done, info = env_val.step(action)  # Step in the environment
 
        # Update state and history for the next iteration
        state = next_state[:]
        history = next_history
        
        # Unpack information from the environment step
        gen = info[0]; bid = info[1]; rat = info[2]; disp = info[3]; revenue = info[4]
        # Collect validation data
        bid_val[n_epi] += [bid]
        rat_val[n_epi] += [rat]
        mae_val[n_epi] += [abs(gen - bid)]
        mbe_val[n_epi] += [abs(disp - bid)]
        rev_val[n_epi] += [revenue]
    
    # Test the model using the test environment
    state = env_test.reset()
    history = (torch.zeros([n_layers, 1, hidden_size], dtype=torch.float), 
               torch.zeros([n_layers, 1, hidden_size], dtype=torch.float))
    for l in range(len(env_test.data_gen)-1):
        # Get action probabilities from the model
        pi_out, next_history = model.pi(torch.tensor(state[-1], dtype=torch.float), history)
        action = pi_out[0].tolist()  # Choose the action from the output
        next_state, reward, done, info = env_test.step(action)  # Step in the environment
 
        # Update state and history for the next iteration
        state = next_state[:]
        history = next_history
        
        # Unpack information from the environment step
        gen = info[0]; bid = info[1]; rat = info[2]; disp = info[3]; revenue = info[4]
        # Collect test data
        bid_test[n_epi] += [bid]
        rat_test[n_epi] += [rat]
        mae_test[n_epi] += [abs(gen - bid)]
        mbe_test[n_epi] += [abs(disp - bid)]
        rev_test[n_epi] += [revenue]
    
    # Print metrics every 'print_interval' episodes
    if (n_epi+1)%print_interval == 0:
        MAE_train = round(100*np.mean(mae_train[n_epi]), 2)  # Mean Absolute Error for training
        MAE_val   = round(100*np.mean(mae_val[n_epi]), 2)    # Mean Absolute Error for validation
        MAE_test  = round(100*np.mean(mae_test[n_epi]), 2)   # Mean Absolute Error for testing
        MBE_train = round(100*np.mean(mbe_train[n_epi]), 2)   # Mean Bias Error for training
        MBE_val   = round(100*np.mean(mbe_val[n_epi]), 2)     # Mean Bias Error for validation
        MBE_test  = round(100*np.mean(mbe_test[n_epi]), 2)    # Mean Bias Error for testing
        REV_train = round(max_price * RE_Capacity1 * np.mean(rev_train[n_epi]), 3)  # Revenue for training
        REV_val   = round(max_price * RE_Capacity2 * np.mean(rev_val[n_epi]), 3)    # Revenue for validation
        REV_test  = round(max_price * RE_Capacity3 * np.mean(rev_test[n_epi]), 3)   # Revenue for testing
 
        # Print the results for the current episode
        print("episode: {}".format(n_epi+1))
        print("MAE_train: {}%".format(MAE_train).ljust(25), end="")
        print("MAE_val: {}%".format(MAE_val).ljust(25), end="")
        print("MAE_test: {}%".format(MAE_test).ljust(25))
        print("MBE_train: {}%".format(MBE_train).ljust(25), end="")
        print("MBE_val: {}%".format(MBE_val).ljust(25), end="")
        print("MBE_test: {}%".format(MBE_test).ljust(25))
        print("REV_train: ${}".format(REV_train).ljust(25), end="")
        print("REV_val: ${}".format(REV_val).ljust(25), end="")
        print("REV_test: ${}".format(REV_test).ljust(25))
        print("------------------------------------------------------------------------------------------")


episode: 1
MAE_train: 31.34%        MAE_val: 33.69%          MAE_test: 30.35%         
MBE_train: 31.26%        MBE_val: 33.69%          MBE_test: 30.35%         
REV_train: $-25.854      REV_val: $-21.404        REV_test: $-18.209       
------------------------------------------------------------------------------------------
episode: 2
MAE_train: 28.12%        MAE_val: 27.12%          MAE_test: 23.92%         
MBE_train: 27.94%        MBE_val: 27.09%          MBE_test: 23.89%         
REV_train: $-6.638       REV_val: $35.064         REV_test: $26.832        
------------------------------------------------------------------------------------------
episode: 3
MAE_train: 23.35%        MAE_val: 22.41%          MAE_test: 19.43%         
MBE_train: 22.96%        MBE_val: 22.13%          MBE_test: 19.14%         
REV_train: $18.104       REV_val: $72.695         REV_test: $54.3          
------------------------------------------------------------------------------------------
episode: 4

In [None]:

# Environment
 
select_num = np.argmax(np.mean(rev_val[:-1],axis=1))
select_test = np.array(bid_test[select_num][:])
select_test_rat = np.array(rat_test[select_num][:])
select_test_real = np.array(data_test[1:])
select_test_price = np.array(price_test[1:])
 
E = E_max/2
mbe = []
reward = []
info = []
for i in range(len(select_test)):
    bid = select_test[i]
    gen = select_test_real[i]
    rat = select_test_rat[i]
    imb = select_test_price[i]
    
    soc = E/E_max
    Voc = a0*np.exp(-a1*soc) + a2 + a3*soc - a4*soc**2 + a5*soc**3
    Rs  = b0*np.exp(-b1*soc) + b2 + b3*soc - b4*soc**2 + b5*soc**3
    Rts = c0*np.exp(-c1*soc) + c2
    Rtl = d0*np.exp(-d1*soc) + d2
    R   = Rs + Rts + Rtl
 
    I_cmax = 1000000*E_max*(soc_max - soc)/N/(Voc*tdelta)
    I_dmax = 1000000*E_max*(soc - soc_min)/N/(Voc*tdelta)
    p_cmax = N*(Voc*I_cmax + I_cmax**2*R)
    p_dmax = N*(Voc*I_dmax - I_dmax**2*R)
 
    P_cmax = p_cmax/1000000; P_dmax = p_dmax/1000000
    P_c = min(max(rat*(gen-bid), 0), P_max, P_cmax)
    P_d = min(max(rat*(bid-gen), 0), P_max, P_dmax)
    p_c = 1000000*P_c/N; p_d = 1000000*P_d/N
 
    I_c = -(Voc - np.sqrt(Voc**2 + 4*R*p_c))/(2*R)
    I_d = (Voc - np.sqrt(Voc**2 - 4*R*p_d))/(2*R)
    if not np.isclose(p_c, 0):
        eff_c = (Voc*I_c)/p_c
        E = E + eff_c*P_c*tdelta
        disp = gen - P_c
        info += [[gen, round(bid,4), 'C', round(P_c,4), round(disp,4), round(eff_c,4), round(E,4)]]
    elif not np.isclose(p_d, 0):
        eff_d = p_d/(Voc*I_d)
        E = E - (1/eff_d)*P_d*tdelta
        disp = gen + P_d
        info += [[gen, round(bid,4), 'D', round(P_d,4), round(disp,4), round(eff_d,4), round(E,4)]]
    else:
        disp = gen
        info += [[gen, round(bid,4), 'N', 'N', round(disp,4), 'N', round(E,4)]]
    
    mbe += [abs(bid - disp)]
    reward += [(imb*disp - imb*abs(bid-disp) - beta*(P_c+P_d))*tdelta]
 
MAE_test = round(100*np.mean(np.abs(select_test_real - select_test)),2)
MBE_test = round(100*np.mean(mbe),2)
print("MAE_test: {}%".format(MAE_test))
print("MBE_test: {}%".format(MBE_test))
print("REV_test: ${}".format(round(max_price*RE_Capacity3*np.mean(reward),3)))

result = {}
result['0'] = select_test
result['1'] = select_test_rat

pd.DataFrame(result).to_csv("./Results/"+RE+"_Model4_DeepBid.csv")