In [1]:
import numpy as np
import tensorflow as tf
import statistics
from predicted_usage_pj import PredictedUsagePJ
from average_usage_pj import AverageUsagePJ
import math

In [2]:
class QNetwork(tf.keras.Model):
    def __init__(self, input_dim, output_dim, hidden_layer_sizes):
        super(QNetwork, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_layer_sizes = hidden_layer_sizes
        self.input_layer = tf.keras.layers.InputLayer(input_shape=(input_dim,))
        self.hidden_layers = [tf.keras.layers.Dense(size, activation='relu') for size in self.hidden_layer_sizes]
        self.output_layer = tf.keras.layers.Dense(output_dim, activation='linear')

    def call(self, inputs):
        x = self.input_layer(inputs)
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)


In [3]:
def change_output_layer_and_retain_weights(base_model, num_neurons, optimizer, loss):
    # Save current weights except the output layer
    current_weights = [layer.get_weights() for layer in base_model.layers[:-1]]

    new_model = QNetwork(base_model.input_dim, num_neurons, base_model.hidden_layer_sizes)
    new_model.build((None, base_model.input_dim))  
    
    for layer, weights in zip(new_model.layers[:-1], current_weights):
        layer.set_weights(weights)

    new_model.compile(optimizer=optimizer, loss=loss)

    return new_model


In [4]:
def deep_q_learning_prosumer_agent(Q_network, epsilon_decay_rate, Pt_Hj, rho_t_b, rho_t_s, gamma_prosumer, iteration): 
    epsilon = max(0.01, 1 - iteration / epsilon_decay_rate)
        
    s_t_PAj = get_current_state_prosumer(iteration)
    q_values = Q_network.predict(np.array([s_t_PAj]))
    
    if q_values.size == 0:
        print("Warning: Q-values are empty. Defaulting to random action.")
        if Q_network.output_dim > 0:
            a_t = np.random.randint(Q_network.output_dim)
        else:
            print("Error: QNetwork output_dim is non-positive.")
            return None  
    if np.random.rand() < epsilon:
        a_t = np.random.randint(Q_network.output_dim)
    else:
        a_t = np.argmax(q_values)
            
    r_t_plus_1_PAj = get_reward_prosumer(Pt_Hj, rho_t_b, rho_t_s)
    s_t_plus_1_PAj = get_current_state_prosumer(iteration + 1)
            
    q_values_next = Q_network.predict(np.array([s_t_plus_1_PAj]))
    max_q_value = np.max(q_values_next)
            
    target = q_values
    target[0][a_t] = r_t_plus_1_PAj + gamma_prosumer * max_q_value
            
    Q_network.fit(np.array([s_t_PAj]), target, epochs=1, verbose=0)
    
    return a_t

In [5]:
def deep_q_learning_grid_agent(Q_network, epsilon_decay_rate, Pt_D, Pt_Gi, Pt_H_all, rho_t_s, gamma, iteration):
    epsilon = max(0.01, 1 - iteration / epsilon_decay_rate)
    
    s_t_GA = get_current_state_grid(iteration)
    q_values = Q_network.predict(np.array([s_t_GA]))
    print(q_values)
    if q_values.size == 0:
        print("Warning: Q-values are empty. Defaulting to random action.")
        if Q_network.output_dim > 0:
            a_t = np.random.randint(Q_network.output_dim)
        else:
            print("Error: QNetwork output_dim is non-positive.")
            return None  
    else:
        if np.random.rand() < epsilon:
            a_t = np.random.randint(Q_network.output_dim)
        else:
            a_t = np.argmax(q_values)
            
    r_t_plus_1_GA = get_reward_grid(Pt_D, Pt_Gi, Pt_H_all, rho_t_s)
    s_t_plus_1_GA = get_current_state_grid(iteration + 1)
            
    q_values_next = Q_network.predict(np.array([s_t_plus_1_GA]))
    max_q_value = np.max(q_values_next)
            
    target = q_values
    target[0][a_t] = r_t_plus_1_GA + gamma * max_q_value
            
    Q_network.fit(np.array([s_t_GA]), target, epochs=1, verbose=0)
    
    return a_t

In [6]:
def get_reward_grid(Pt_D, Pt_Gi, Pt_H_all, rho_t_s):
    
    vt_G = Pt_D * rho_t_s  
    grid_cost_generation = sum(Pt_Gi)
    grid_cost_prosumers = sum(Pt_H_all)
    rt_GA = vt_G - (grid_cost_generation + grid_cost_prosumers)
    
    return rt_GA


In [7]:
def get_reward_prosumer(Pt_Hj, rho_t_b, rho_t_s):
    
    vt_Hj = Pt_Hj * rho_t_b
    prosumer_cost = Pt_Hj * rho_t_s
    rt_PAj = vt_Hj - prosumer_cost
    
    return rt_PAj

In [8]:
market_price= np.random.rand(61)
def get_current_state_prosumer(i):
    
    current_state = [
        get_battery_state(i),  # how much our battery is charged at that moment
        get_pv_generation(i),   # how much power has been generated for the past minute/hour/or any scale
        market_price[i],  # current market price
    ]
    
    return current_state


In [9]:
import numpy as np
np.random.rand(10)

array([0.12253558, 0.38334959, 0.83184186, 0.26381137, 0.58120627,
       0.8781816 , 0.80367028, 0.33893624, 0.92410852, 0.84237903])

In [10]:
def get_current_state_grid(i):
    current_state = [
        get_generation_costs(i),  # generation costs
        get_prosumer_costs(i),   # prosumer costs
        get_grid_demand(i)       # grid demand
    ]
    return current_state


In [11]:
def avg_current_market_price(data_mp):
    avg = statistics.mean(data_mp)
    return avg

In [12]:
def best_buy_sell_price(data_mp, a_t, output_dim):
    buy_sell_prices = {}
    variance = np.var(data_mp)
    cmp = avg_current_market_price(data_mp)
    
    increament=2*variance/(output_dim-1)
    price=cmp-variance
    
    for i in range(output_dim):
        buy_sell_prices[str(i)] = price
        price += increament 

    a_t_key = str(a_t)
    best_price = buy_sell_prices.get(a_t_key)  # Default to current market price if not found
#     if a_t not in range(variance):
#         print("a_t not in variance")
#     else:
#         print("The value of a_t is this ", a_t)
    return best_price, variance


In [13]:
size=61
values=np.random.rand(size,2).tolist()
def initialize_Pt_Gi(i):
    return values[i]

In [14]:
values=np.random.rand(size,2).tolist()
def initialize_Pt_Hj(i):
    return values[i]

In [15]:
def grid_buy_price(i):
    """ Returns the grid's buy price at a particular time t. """
    grid_buy_prices = [10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       10.0, 9.5, 10.2, 11.0, 10.8, 11.5, 10.2, 10.9, 11.1,
                       9.6, 10.1, 10.3, 10.7, 11.4, 10.9, 13]  # Added 6 new values
    return grid_buy_prices[i]

In [16]:
def get_battery_state(i):
    battery_states = np.random.rand(61)
    return battery_states[i]

In [17]:
def grid_sell_price(i):
    """ Returns the grid's sell price at a particular time t. """
    grid_sell_prices = [12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.0, 12.5, 13.0, 12.8, 13.2, 12.7, 13.3, 12.9, 13.0,
                        12.4, 13.1, 12.9, 13.3, 12.8, 13.0, 14] 
    return grid_sell_prices[i]

In [18]:
def grid_power(i):
    """ Returns the total power demanded from the grid at a particular time t. """
    grid_power_demand = [50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         50, 48, 55, 60, 52, 58, 62, 56, 53,
                         49, 47, 54, 59, 51, 57, 58]
    return grid_power_demand[i]

In [19]:
def Data(i):
    """ List of current market prices over a certain period of time decided by us. """
    market_prices = [10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.5, 10.2, 11.0, 11.5, 10.8, 11.2, 11.3, 10.9, 11.1,
                     10.7, 10.4, 10.9, 11.2, 10.9, 11.0, 12]
    return market_prices[i], market_prices

In [20]:
def jth_prosumer_power(i):
    """ Returns excess power generated by a prosumer. """
    prosumer_power_generation = [20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 20, 22, 18, 25, 23, 19, 26, 21, 24,
                                 27, 22, 19, 24, 23, 20, 21] 
    return prosumer_power_generation[i]

In [21]:
def get_pv_generation(i):
    pv_generation_values = np.random.uniform(0, 30, 61)
    return pv_generation_values[i]

In [22]:
np.random

<module 'numpy.random' from 'C:\\Users\\SIDDHARTH SINGH\\anaconda3\\lib\\site-packages\\numpy\\random\\__init__.py'>

In [23]:
def get_generation_costs(i):
    """ Returns the generation cost at a specific time t. """
    generation_costs = [20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 22, 19, 24, 23, 25, 21, 18, 22, 23,
                        20, 19, 24, 25, 22, 21, 22]
    return generation_costs[i]

In [24]:
def get_prosumer_costs(i):
    """ Returns the prosumer cost at a specific time t. """
    prosumer_costs = [30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      30, 28, 35, 32, 33, 31, 29, 34, 30,
                      32, 29, 33, 30, 31, 32, 33]  
    return prosumer_costs[i]

In [25]:
def get_grid_demand(i):
    """ Returns the grid demand at a specific time t. """
    grid_demand = [40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   40, 45, 42, 50, 48, 46, 44, 43, 47,
                   41, 44, 42, 51, 49, 46, 47] 
    return grid_demand[i]

In [26]:

timestamps = np.arange(61)  # Assuming 60 samples
battery_states = np.random.rand(61)  # Random battery states for each sample
pv_generation = np.random.rand(61)  # Random PV generation for each sample
market_prices = np.random.rand(61)  # Random market prices for each sample
energy_values = np.random.rand(61)  # Random energy values for each sample

# Stack the features into X_train
X_train = np.column_stack((timestamps, battery_states, pv_generation, market_prices))

# Set up y_train as the energy values
y_train = energy_values

In [28]:
max_iteration = 60
input_dim = 3 
epsilon_decay_rate=0.01
predicted_pj = PredictedUsagePJ()
average_usage_pj = AverageUsagePJ()
hidden_layer_sizes = [64, 128, 64] 
prosumer_number=2
output_dim_prosumer=5
output_dim_grid=5
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
gamma=0.3
gamma_prosumer=0.3

q_network_grid = QNetwork(input_dim, output_dim_grid, hidden_layer_sizes)
q_network_grid.compile(optimizer='adam', loss='mean_squared_error')

# q_network_prosumer = QNetwork(input_dim, output_dim_prosumer, hidden_layer_sizes)
# q_network_prosumer.compile(optimizer=optimizer, loss='mean_squared_error')

network_prosumers = [QNetwork(input_dim, output_dim_prosumer, hidden_layer_sizes) for _ in range(prosumer_number)]
for net in network_prosumers:
    net.compile(optimizer='adam', loss='mean_squared_error')

for iteration in range(max_iteration):

    Pt_D = grid_power(iteration)
    rho_t_s = grid_sell_price(iteration)
    Pt_H_all = initialize_Pt_Hj(iteration)
    Pt_Gi = initialize_Pt_Gi(iteration)
    
    if iteration >= 5:
        data_mp = market_prices[iteration - 4:iteration + 1]
    else:
        data_mp=market_prices[:iteration+5]
        
    a_t_grid = deep_q_learning_grid_agent(q_network_grid, epsilon_decay_rate, Pt_D, Pt_Gi, Pt_H_all, rho_t_s, gamma,iteration)
    rho_t_b = grid_buy_price(iteration)
    best_sell_price, variance = best_buy_sell_price(data_mp, a_t_grid, output_dim_grid)
    print(f"Iteration:{iteration}, For grid agent best sell price is: {best_sell_price}")
    
    for j in range(prosumer_number):
        
        data, market_prices = Data(iteration)
        Pt_Hj = initialize_Pt_Hj(iteration)[j]  
            
        a_t_prosumer = deep_q_learning_prosumer_agent(network_prosumers[j], epsilon_decay_rate, Pt_Hj, rho_t_b, rho_t_s, gamma_prosumer,iteration)

        x = X_train[iteration].reshape(1, -1) 
        y = np.array([y_train[iteration]])    

        predicted_pj.train_model(x, y)
        predicted_usage_pj = predicted_pj.predict_usage(X_train[iteration])

        average_usage_pj.update_usage(y_train[iteration])
        average_usage = average_usage_pj.calculate_average_usage()

        if predicted_usage_pj > average_usage:    
            best_buy_price, variance = best_buy_sell_price(data_mp, a_t_prosumer, output_dim_prosumer)
            print(f"Iteration: {iteration}, For agent: {j}, best buy price is: {best_buy_price}")
        else:
            best_sell_price, variance = best_buy_sell_price(data_mp, a_t_prosumer, output_dim_prosumer)
            print(f"Iteration: {iteration}, For agent: {j}, best sell price is: {best_sell_price}")
        
    

[[-8.436445   1.2933575 -1.6028746 -7.319366   1.4889235]]
Iteration:0, For grid agent best sell price is: 0.5166837184187364
Iteration: 0, For agent: 0, best sell price is: 0.3005340186744069
Iteration: 0, For agent: 1, best sell price is: 0.40860886854657164
[[-7.741612    0.76462555 -1.1157223  -7.991831    4.321465  ]]
Iteration:1, For grid agent best sell price is: 11.052222222222223
Iteration: 1, For agent: 0, best sell price is: 10.773888888888889
Iteration: 1, For agent: 1, best sell price is: 10.866666666666667
[[-7.8511786   0.36400756 -0.48809874 -8.345779    7.671534  ]]
Iteration:2, For grid agent best sell price is: 11.11061224489796
Iteration: 2, For agent: 0, best sell price is: 10.837551020408164
Iteration: 2, For agent: 1, best sell price is: 11.11061224489796
[[-6.9726872   0.15321892  0.9188083  -9.2629175  11.802825  ]]
Iteration:3, For grid agent best sell price is: 11.084375000000001
Iteration: 3, For agent: 0, best sell price is: 10.8453125
Iteration: 3, For age

Iteration: 14, For agent: 0, best sell price is: 10.8448
Iteration: 14, For agent: 1, best sell price is: 10.8448
[[ -1.901593  -7.557908  11.702095 -12.837194  50.35918 ]]
Iteration:15, For grid agent best sell price is: 11.218399999999997
Iteration: 15, For agent: 0, best sell price is: 11.130799999999999
Iteration: 15, For agent: 1, best sell price is: 11.130799999999999
[[ -2.5232143  -8.042102   12.133645  -13.602342   53.15344  ]]
Iteration:16, For grid agent best sell price is: 11.206400000000004
Iteration: 16, For agent: 0, best sell price is: 11.106800000000002
Iteration: 16, For agent: 1, best sell price is: 11.0736
[[ -2.1224926  -8.664468   12.869623  -13.815459   56.400566 ]]
Iteration:17, For grid agent best sell price is: 11.094400000000004
Iteration: 17, For agent: 0, best sell price is: 11.042800000000002
Iteration: 17, For agent: 1, best sell price is: 11.0256
[[ -2.3974462  -9.077213   13.636711  -14.445521   58.53305  ]]
Iteration:18, For grid agent best sell price 

Iteration:29, For grid agent best sell price is: 10.8544
Iteration: 29, For agent: 0, best sell price is: 10.8544
Iteration: 29, For agent: 1, best sell price is: 10.6256
[[ -9.1427145 -26.254316   36.56474   -33.51808   146.86731  ]]
Iteration:30, For grid agent best sell price is: 11.0704
Iteration: 30, For agent: 0, best sell price is: 11.0704
Iteration: 30, For agent: 1, best sell price is: 10.6496
[[-10.210993 -28.200096  39.37211  -35.913803 157.47621 ]]
Iteration:31, For grid agent best sell price is: 10.996000000000004
Iteration: 31, For agent: 0, best sell price is: 10.996000000000004
Iteration: 31, For agent: 1, best sell price is: 10.604000000000001
[[-10.494846 -28.245626  39.102936 -35.421772 156.39534 ]]
Iteration:32, For grid agent best sell price is: 11.1304
Iteration: 32, For agent: 0, best sell price is: 11.1304
Iteration: 32, For agent: 1, best sell price is: 11.0352
[[-11.520044 -30.761154  42.751072 -38.392323 170.06903 ]]
Iteration:33, For grid agent best sell pri

Iteration: 43, For agent: 1, best sell price is: 11.173200000000003
[[-29.771694 -63.746384  84.6304   -72.43839  342.19763 ]]
Iteration:44, For grid agent best sell price is: 11.094400000000004
Iteration: 44, For agent: 0, best sell price is: 11.094400000000004
Iteration: 44, For agent: 1, best sell price is: 11.077200000000003
[[-30.309216 -63.33288   84.34333  -72.13735  341.50046 ]]
Iteration:45, For grid agent best sell price is: 11.079999999999997
Iteration: 45, For agent: 0, best sell price is: 11.079999999999997
Iteration: 45, For agent: 1, best sell price is: 11.039999999999997
[[-32.36068  -67.298676  88.8135   -75.55609  361.43985 ]]
Iteration:46, For grid agent best sell price is: 10.96
Iteration: 46, For agent: 0, best sell price is: 10.96
Iteration: 46, For agent: 1, best sell price is: 10.88
[[-34.582863 -69.36266   91.54199  -78.09067  374.12622 ]]
Iteration:47, For grid agent best sell price is: 10.8544
Iteration: 47, For agent: 0, best sell price is: 10.8544
Iteration

Iteration: 58, For agent: 0, best sell price is: 10.854800000000003
Iteration: 58, For agent: 1, best sell price is: 10.820000000000002
[[ -75.65433 -135.19997  175.38782 -145.5498   750.8444 ]]
Iteration:59, For grid agent best sell price is: 10.949600000000004
Iteration: 59, For agent: 0, best sell price is: 10.914800000000003
Iteration: 59, For agent: 1, best sell price is: 10.880000000000003
