In [1]:
import numpy as np
import tensorflow as tf
import statistics
from predicted_usage_pj import PredictedUsagePJ
from average_usage_pj import AverageUsagePJ
import math
import random

In [2]:
class ReplayMemory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.buffer = []
        self.position = 0

    def push(self, state, action, reward, next_state, done):
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        self.buffer[self.position] = (state, action, reward, next_state, done)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

    def __len__(self):
        return len(self.buffer)

In [3]:
class QNetwork(tf.keras.Model):
    def __init__(self, input_dim, output_dim, hidden_layer_sizes):
        super(QNetwork, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layer_sizes = hidden_layer_sizes
        self.input_layer = tf.keras.layers.InputLayer(input_shape=(input_dim,))
        self.hidden_layers = [tf.keras.layers.Dense(size, activation='relu') for size in self.hidden_layer_sizes]
        self.output_layer = tf.keras.layers.Dense(output_dim, activation='linear')

    def call(self, inputs):
        x = self.input_layer(inputs)
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)

In [4]:
def learn_from_memory(Q_network, target_network, replay_memory, gamma, batch_size):
    if len(replay_memory) < batch_size:
        return  # Not enough samples

    transitions = replay_memory.sample(batch_size)
    batch = list(zip(*transitions))

    states = np.array(batch[0])
    actions = np.array(batch[1])
    rewards = np.array(batch[2])
    next_states = np.array(batch[3])
    dones = np.array(batch[4])

    # Predict Q-values for current state and next states
    current_q_values = Q_network(states)
    next_q_values = target_network(next_states)  # Use the target network for stability
    max_next_q_values = np.max(next_q_values, axis=1)

    # Calculate target Q-values
    target_q_values = rewards + gamma * (1 - dones) * max_next_q_values

    # Update Q-values for actions taken
    targets = current_q_values.numpy()
    for i, action in enumerate(actions):
        targets[i, actions[i]] = target_q_values[i]

    # Train the network
    Q_network.train_on_batch(states, targets)


In [5]:
def update_target_network(target_model, model):
    target_model.set_weights(model.get_weights())


In [6]:
def change_output_layer_and_retain_weights(base_model, num_neurons, optimizer, loss):
    # Save current weights except the output layer
    current_weights = [layer.get_weights() for layer in base_model.layers[:-1]]

    new_model = QNetwork(base_model.input_dim, num_neurons, base_model.hidden_layer_sizes)
    new_model.build((None, base_model.input_dim))  
    
    for layer, weights in zip(new_model.layers[:-1], current_weights):
        layer.set_weights(weights)

    new_model.compile(optimizer=optimizer, loss=loss)

    return new_model


In [7]:
def deep_q_learning_prosumer_agent(Q_network, target_network, replay_memory, epsilon_decay_rate, Pt_Hj, rho_t_b, rho_t_s, gamma_prosumer, iteration, batch_size):
    epsilon = max(0.01, 1 - iteration / epsilon_decay_rate)
    state = get_current_state_prosumer(iteration)
    if np.random.rand() < epsilon:
        action = np.random.randint(Q_network.output_dim)
    else:
        q_values = Q_network.predict(np.array([state]))
        action = np.argmax(q_values[0])

    next_state = get_current_state_prosumer(iteration + 1)
    reward = get_reward_prosumer(Pt_Hj, rho_t_b, rho_t_s)
    done = iteration == max_iteration - 1

    replay_memory.push(state, action, reward, next_state, done)
    learn_from_memory(Q_network, target_network, replay_memory, gamma_prosumer, batch_size)  # Corrected to include target_network

    return action

In [8]:
def deep_q_learning_grid_agent(Q_network, target_network, replay_memory, epsilon_decay_rate, Pt_D, Pt_Gi, Pt_H_all, rho_t_s, gamma, iteration, batch_size):
    epsilon = max(0.01, 1 - iteration / epsilon_decay_rate)
    state = get_current_state_grid(iteration)
    if np.random.rand() < epsilon:
        action = np.random.randint(Q_network.output_dim)
    else:
        q_values = Q_network.predict(np.array([state]))
        action = np.argmax(q_values[0])

    next_state = get_current_state_grid(iteration + 1)
    reward = get_reward_grid(Pt_D, Pt_Gi, Pt_H_all, rho_t_s)
    done = iteration == max_iteration - 1

    replay_memory.push(state, action, reward, next_state, done)
    learn_from_memory(Q_network, target_network, replay_memory, gamma, batch_size)  # Corrected to include target_network

    return action

In [9]:
def get_reward_grid(Pt_D, Pt_Gi, Pt_H_all, rho_t_s):
    
    vt_G = Pt_D * rho_t_s  
    grid_cost_generation = sum(Pt_Gi)
    grid_cost_prosumers = sum(Pt_H_all)
    rt_GA = vt_G - (grid_cost_generation + grid_cost_prosumers)
    
    return rt_GA


In [10]:
def get_reward_prosumer(Pt_Hj, rho_t_b, rho_t_s):
    
    vt_Hj = Pt_Hj * rho_t_b
    prosumer_cost = Pt_Hj * rho_t_s
    rt_PAj = vt_Hj - prosumer_cost
    
    return rt_PAj

In [11]:
market_price= np.random.rand(61)
def get_current_state_prosumer(i):
    
    current_state = [
        get_battery_state(i),  # how much our battery is charged at that moment
        get_pv_generation(i),   # how much power has been generated for the past minute/hour/or any scale
        market_price[i],  # current market price
    ]
    
    return current_state


In [12]:
import numpy as np
np.random.rand(10)

array([0.82977117, 0.94136658, 0.95644145, 0.69407431, 0.32266284,
       0.81888835, 0.99256935, 0.37015462, 0.22908241, 0.08837035])

In [13]:
def get_current_state_grid(i):
    current_state = [
        get_generation_costs(i),  # generation costs
        get_prosumer_costs(i),   # prosumer costs
        get_grid_demand(i)       # grid demand
    ]
    return current_state


In [14]:
def avg_current_market_price(data_mp):
    avg = statistics.mean(data_mp)
    return avg

In [15]:
def best_buy_sell_price(data_mp, a_t, output_dim):
    buy_sell_prices = {}
    variance = np.var(data_mp)
    cmp = avg_current_market_price(data_mp)
    
    increament=2*variance/(output_dim-1)
    price=cmp-variance
    
    for i in range(output_dim):
        buy_sell_prices[str(i)] = price
        price += increament 

    a_t_key = str(a_t)
    best_price = buy_sell_prices.get(a_t_key)  # Default to current market price if not found
#     if a_t not in range(variance):
#         print("a_t not in variance")
#     else:
#         print("The value of a_t is this ", a_t)
    return best_price, variance


In [16]:
size=61
values=np.random.rand(size,2).tolist()
def initialize_Pt_Gi(i):
    return values[i]

In [17]:
values=np.random.rand(size,2).tolist()
def initialize_Pt_Hj(i):
    return values[i]

In [18]:
def grid_buy_price(i):
    """ Returns the grid's buy price at a particular time t. """
    grid_buy_prices = [10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       9.6, 10.1, 10.3, 10.7, 11.4, 10.9, 13]  # Added 6 new values
    return grid_buy_prices[i]

In [19]:
def get_battery_state(i):
    battery_states = np.random.rand(61)
    return battery_states[i]

In [20]:
def grid_sell_price(i):
    """ Returns the grid's sell price at a particular time t. """
    grid_sell_prices = [12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.4, 13.1, 12.9, 13.3, 12.8, 13.0, 14] 
    return grid_sell_prices[i]

In [21]:
def grid_power(i):
    """ Returns the total power demanded from the grid at a particular time t. """
    grid_power_demand = [50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         49, 47, 54, 59, 51, 57, 58]
    return grid_power_demand[i]

In [22]:
def Data(i):
    """ List of current market prices over a certain period of time decided by us. """
    market_prices = [10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.7, 10.4, 10.9, 11.2, 10.9, 11.0, 12]
    return market_prices[i], market_prices

In [23]:
def jth_prosumer_power(i):
    """ Returns excess power generated by a prosumer. """
    prosumer_power_generation = [20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 27, 22, 19, 24, 23, 20, 21] 
    return prosumer_power_generation[i]

In [24]:
def get_pv_generation(i):
    pv_generation_values = np.random.uniform(0, 30, 61)
    return pv_generation_values[i]

In [25]:
np.random

<module 'numpy.random' from 'C:\\Users\\SIDDHARTH SINGH\\anaconda3\\lib\\site-packages\\numpy\\random\\__init__.py'>

In [26]:
def get_generation_costs(i):
    """ Returns the generation cost at a specific time t. """
    generation_costs = [20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 19, 24, 25, 22, 21, 22]
    return generation_costs[i]

In [27]:
def get_prosumer_costs(i):
    """ Returns the prosumer cost at a specific time t. """
    prosumer_costs = [30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      32, 29, 33, 30, 31, 32, 33]  
    return prosumer_costs[i]

In [28]:
def get_grid_demand(i):
    """ Returns the grid demand at a specific time t. """
    grid_demand = [40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   41, 44, 42, 51, 49, 46, 47] 
    return grid_demand[i]

In [29]:

timestamps = np.arange(61)  # Assuming 60 samples
battery_states = np.random.rand(61)  # Random battery states for each sample
pv_generation = np.random.rand(61)  # Random PV generation for each sample
market_prices = np.random.rand(61)  # Random market prices for each sample
energy_values = np.random.rand(61)  # Random energy values for each sample

# Stack the features into X_train
X_train = np.column_stack((timestamps, battery_states, pv_generation, market_prices))

# Set up y_train as the energy values
y_train = energy_values

In [30]:
def initialize_and_build_model(input_dim, output_dim, hidden_layer_sizes):
    model = QNetwork(input_dim, output_dim, hidden_layer_sizes)
    model.build((None, input_dim))  # Explicitly build the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [31]:
max_iteration = 60
input_dim = 3 
epsilon_decay_rate=0.01
predicted_pj = PredictedUsagePJ()
average_usage_pj = AverageUsagePJ()
hidden_layer_sizes = [64, 128, 64] 
prosumer_number=2
output_dim_prosumer=5
output_dim_grid=5
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
gamma=0.3
gamma_prosumer=0.3
UPDATE_TARGET_NETWORK_EVERY = 5

# Constants
BATCH_SIZE = 64
REPLAY_MEMORY_CAPACITY = 5000

# Replay memories
replay_memory_grid = ReplayMemory(REPLAY_MEMORY_CAPACITY)
replay_memory_prosumers = [ReplayMemory(REPLAY_MEMORY_CAPACITY) for _ in range(prosumer_number)]

network_prosumers = [initialize_and_build_model(input_dim, output_dim_prosumer, hidden_layer_sizes) for _ in range(prosumer_number)]

target_network_prosumers = [initialize_and_build_model(input_dim, output_dim_prosumer, hidden_layer_sizes) for _ in range(prosumer_number)]
for net, target_net in zip(network_prosumers, target_network_prosumers):
    target_net.set_weights(net.get_weights())
    
q_network_grid = initialize_and_build_model(input_dim, output_dim_grid, hidden_layer_sizes)

target_network_grid = initialize_and_build_model(input_dim, output_dim_grid, hidden_layer_sizes)
target_network_grid.set_weights(q_network_grid.get_weights())

for iteration in range(max_iteration):
    
    # Periodic update of the target network weights
    if iteration % UPDATE_TARGET_NETWORK_EVERY == 0:
        update_target_network(target_network_grid, q_network_grid)
        for net, target_net in zip(network_prosumers, target_network_prosumers):
            update_target_network(target_net, net)
    
    Pt_D = grid_power(iteration)
    rho_t_s = grid_sell_price(iteration)
    Pt_H_all = initialize_Pt_Hj(iteration)
    Pt_Gi = initialize_Pt_Gi(iteration)
    
    if iteration >= 5:
        data_mp = market_prices[iteration - 4:iteration + 1]
    else:
        data_mp = market_prices[:iteration+5]

    # Grid Agent Learning and Action Selection
    a_t_grid = deep_q_learning_grid_agent(q_network_grid, target_network_grid, replay_memory_grid, epsilon_decay_rate, Pt_D, Pt_Gi, Pt_H_all, rho_t_s, gamma, iteration, BATCH_SIZE)
    rho_t_b = grid_buy_price(iteration)
    best_sell_price, variance = best_buy_sell_price(data_mp, a_t_grid, output_dim_grid)
    print(f"Iteration:{iteration}, For grid agent best sell price is: {best_sell_price}")
    
    # Prosumer Agents Learning and Action Selection
    for j in range(prosumer_number):
        data, market_prices = Data(iteration)
        Pt_Hj = initialize_Pt_Hj(iteration)[j]  
        a_t_prosumer = deep_q_learning_prosumer_agent(network_prosumers[j],target_network_prosumers[j], replay_memory_prosumers[j], epsilon_decay_rate, Pt_Hj, rho_t_b, rho_t_s, gamma_prosumer, iteration, BATCH_SIZE)

        # Predicted and Average Usage Logic
        x = X_train[iteration].reshape(1, -1)
        y = np.array([y_train[iteration]])    
        predicted_pj.train_model(x, y)
        predicted_usage_pj = predicted_pj.predict_usage(X_train[iteration])
        average_usage_pj.update_usage(y_train[iteration])
        average_usage = average_usage_pj.calculate_average_usage()

        if predicted_usage_pj > average_usage:    
            best_buy_price, variance = best_buy_sell_price(data_mp, a_t_prosumer, output_dim_prosumer)
            print(f"Iteration: {iteration}, For agent: {j}, best buy price is: {best_buy_price}")
        else:
            best_sell_price, variance = best_buy_sell_price(data_mp, a_t_prosumer, output_dim_prosumer)
            print(f"Iteration: {iteration}, For agent: {j}, best sell price is: {best_sell_price}")
    

Iteration:0, For grid agent best sell price is: 0.43125113252900754
Iteration: 0, For agent: 0, best sell price is: 0.43125113252900754
Iteration: 0, For agent: 1, best sell price is: 0.43125113252900754
Iteration:1, For grid agent best sell price is: 11.052222222222223
Iteration: 1, For agent: 0, best sell price is: 10.866666666666667
Iteration: 1, For agent: 1, best sell price is: 11.052222222222223
Iteration:2, For grid agent best sell price is: 11.11061224489796
Iteration: 2, For agent: 0, best sell price is: 10.928571428571429
Iteration: 2, For agent: 1, best sell price is: 11.11061224489796
Iteration:3, For grid agent best sell price is: 11.084375000000001
Iteration: 3, For agent: 0, best sell price is: 10.925
Iteration: 3, For agent: 1, best sell price is: 11.084375000000001
Iteration:4, For grid agent best sell price is: 11.089135802469135
Iteration: 4, For agent: 0, best sell price is: 10.944444444444445
Iteration: 4, For agent: 1, best sell price is: 11.089135802469135
Iterat

Iteration: 24, For agent: 0, best sell price is: 11.159999999999998
Iteration: 24, For agent: 1, best sell price is: 11.218399999999997
Iteration:25, For grid agent best sell price is: 11.206400000000004
Iteration: 25, For agent: 0, best sell price is: 11.140000000000002
Iteration: 25, For agent: 1, best sell price is: 11.206400000000004
Iteration:26, For grid agent best sell price is: 11.094400000000004
Iteration: 26, For agent: 0, best sell price is: 11.060000000000002
Iteration: 26, For agent: 1, best sell price is: 11.094400000000004
Iteration:27, For grid agent best sell price is: 11.079999999999997
Iteration: 27, For agent: 0, best sell price is: 10.999999999999998
Iteration: 27, For agent: 1, best sell price is: 11.079999999999997
Iteration:28, For grid agent best sell price is: 10.96
Iteration: 28, For agent: 0, best sell price is: 10.8
Iteration: 28, For agent: 1, best sell price is: 10.96
Iteration:29, For grid agent best sell price is: 10.8544
Iteration: 29, For agent: 0, be

Iteration:48, For grid agent best sell price is: 11.0704
Iteration: 48, For agent: 0, best sell price is: 10.86
Iteration: 48, For agent: 1, best sell price is: 11.0704
Iteration:49, For grid agent best sell price is: 10.996000000000004
Iteration: 49, For agent: 0, best sell price is: 10.800000000000002
Iteration: 49, For agent: 1, best sell price is: 10.996000000000004
Iteration:50, For grid agent best sell price is: 11.1304
Iteration: 50, For agent: 0, best sell price is: 10.94
Iteration: 50, For agent: 1, best sell price is: 11.1304
Iteration:51, For grid agent best sell price is: 11.218399999999997
Iteration: 51, For agent: 0, best sell price is: 11.159999999999998
Iteration: 51, For agent: 1, best sell price is: 11.218399999999997
Iteration:52, For grid agent best sell price is: 11.206400000000004
Iteration: 52, For agent: 0, best sell price is: 11.173200000000003
Iteration: 52, For agent: 1, best sell price is: 11.206400000000004
Iteration:53, For grid agent best sell price is: 1