In [12]:
import sys
sys.path.append("v2_Assignment_Codes")  # Add the folder to the search path

#load data
from v2_data import get_fixed_data
from PriceProcess import price_model
from WindProcess import wind_model
from utils import generate_time_series,generate_experiment_series

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pyomo.environ import *
from mdp import check_feasibility,sim_MDP_exp, sim_MDP, generate_scenarios

%load_ext autoreload
%autoreload 2
plt.rcParams.update({'font.size': 13})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
data = get_fixed_data()
T = data['num_timeslots']

In [14]:
# ensure we always use the same time series for all tasks and experiments
prices,winds = generate_experiment_series()

# Task 3

In [15]:
import random

np.random.seed(42)
random.seed(42)

## Define Value Function

In [16]:
class ValueFunction():
    def __init__(self, T, state_dim):
        self.T = T
        self.state_dim = state_dim
        self.weights = np.ones((T, state_dim + 1))  # +1 for the bias term
        
    def compute_value_explicit(self, t, state):
        # Append 1 for the bias term to the state
        if t >= T:
            return 0
        state_with_bias = state + [1]
        value = 0
        for j in range(len(state_with_bias)):
            value += state_with_bias[j] * self.weights[t, j].item()
        return value
    
    def compute_value(self, t, states):
        if t >= T:
            return np.zeros(states.shape[0])
        # Append 1 for the bias term to each state
        states_with_bias = np.hstack((states, np.ones((states.shape[0], 1))))
        return np.dot(states_with_bias, self.weights[t])

    def update(self, t, states, target_values):
        if t >= T:
            return 
        # Append 1 for the bias term to each state
        states_with_bias = np.hstack((states, np.ones((states.shape[0], 1))))
        # Solve the least squares problem to find the optimal weights
        self.weights[t], _, _, _ = np.linalg.lstsq(states_with_bias, target_values, rcond=None)
    
    def squared_error(self, t, states, target_values):
        # Compute the squared error
        predicted_values = self.compute_value(t, states)
        return np.mean((predicted_values - target_values) ** 2)


## Function to sample representative pairs

In [17]:
def sample_representative_state_pairs(I):
    T = data['num_timeslots']
    state_pairs = np.zeros((T,I,7)) # seven state variables
    for i in range(I):
        # sample exogenous state variables
        # TODO : should varying initial coniditions be used?
        price, wind = generate_time_series(T)
        # sample endogenous state variables
        h = np.random.uniform(0, data['hydrogen_capacity'], T)
        e_on = np.random.choice([0, 1], T)
        for t in range(T):
            state = [t, h[t], e_on[t-1] if t > 0 else 0, wind[t], wind[t-1] if t > 0 else data['wind_power_previous'], price[t], price[t-1] if t > 0 else data['price_previous']]
            state_pairs[t, i] = state
    return state_pairs
state_pairs = sample_representative_state_pairs(50)


## Linear program to perform value function minimization

In [18]:
def value_minimization(V: ValueFunction,t,state_cur,scenarios, gamma,print_result=False): 

    t, h, e_on_tm1, wind, wind_previous, price, price_previous = state_cur

    # Create a model
    model = ConcreteModel()
    # Declare indexed variable for the price
    model.p_grid = Var(within=NonNegativeReals,name='p_grid')
    model.e_h2p = Var(within=NonNegativeReals,name='e_h2p')
    model.e_p2h = Var(within=NonNegativeReals,name='e_p2h')
    model.e_on = Var(within=Binary,name='e_on')
    
    # declare the new state
    model.next_e_on = Var(within=Binary,name='new_e_on')
    model.next_h = Var(within=NonNegativeReals,bounds=(0,data['hydrogen_capacity']),name='new_h')

    # Objective function
    def objective_rule(model):
        
        expected_next_value = 0
        for scenario in scenarios:
            scenario_state =  [t+1, model.next_h, model.next_e_on, scenario.wind, wind, scenario.price, price]
            expected_next_value += V.compute_value_explicit(int(t)+1,scenario_state)
        expected_next_value /= len(scenarios) 
        
        return price * model.p_grid + data['electrolyzer_cost']*model.e_on + gamma * expected_next_value

    model.profit = Objective(rule=objective_rule, sense=minimize)
    model.DemandConstraint = Constraint(rule=lambda model: model.p_grid + wind + data['conversion_h2p']*model.e_h2p - model.e_p2h >= data['demand_schedule'][int(t)])

    # contraints

    model.h_contraint = Constraint(expr=lambda model: model.next_h == h + data['conversion_p2h']*model.e_p2h-model.e_h2p)

    model.p2h_constraint = Constraint(rule=lambda model: model.e_h2p <= h)
    model.p2h_constraint2 = Constraint(rule=lambda model: data['conversion_h2p']*model.e_h2p <= data['h2p_max_rate'])

    model.conversion_contraint = Constraint(rule=lambda model: data['conversion_p2h'] * model.e_p2h <= data['p2h_max_rate']*e_on_tm1)

    model.e_on_constraint = Constraint(rule=lambda model: model.e_on == model.next_e_on)

    # Create a solver
    solver = SolverFactory('gurobi')  # Make sure Gurobi is installed and properly configured

    # Solve the model
    results = solver.solve(model, tee=False)
    if print_result:
        # Check if an optimal solution was found
        if results.solver.termination_condition == TerminationCondition.optimal:
            print("Optimal solution found")
            print(f"profit: {value(model.profit)}")
            print(f"p_grid: {value(model.p_grid)}")
            print(f"e_h2p: {value(model.e_h2p)}")
            print(f"e_p2h: {value(model.e_p2h)}")
            print(f"e_on: {value(model.e_on)}")
        else:
            print("No optimal solution found.")
    decision = (model.e_on.value,model.e_p2h.value,model.e_h2p.value,model.p_grid.value)
    return decision,value(model.profit)


## Perform Backward Value Function Approximation

In [None]:
def backward_value_approx(V, state_pairs, K, data,gamma=0.9):
    T = data['num_timeslots']
    I = state_pairs.shape[1]
    for t in range(T-1, -1, -1):
        print(f"t={t}")
        value_targets = np.zeros(I)
        # go trough state pairs
        for i in range(I):
            state = state_pairs[t, i]
            _, h, e_on_tm1, wind, wind_previous, price, price_previous = state
            scenarios, scenario_probs = generate_scenarios(wind, price, wind_previous, price_previous, 1, k=K, n_samples=K)
            _,value_targets[i] = value_minimization(V, t, state, scenarios[0], gamma)
        print(V.squared_error(t, state_pairs[t], value_targets))
        V.update(t, state_pairs[t], value_targets)
        print(V.squared_error(t, state_pairs[t], value_targets))        
    return V


state_pairs = sample_representative_state_pairs(100)
V = backward_value_approx(ValueFunction(data['num_timeslots'],7),state_pairs,40,data)

t=23
5027.166681186691
174.5833808403135
t=22
4829.509806440117
112.83540374151303
t=21
4527.189411880017
117.47265070615003
t=20
4205.523445309924
77.45289595975068
t=19
4342.916787727729
56.93450909681083
t=18
4214.927225623123
18.994428504946505
t=17
3981.2990424841405
9.674165245025316
t=16
4042.3905816908787
16.819865607356164
t=15
3844.7432310860368
151.20918466269364
t=14
4097.799613877365
31.290982194315077
t=13
3884.3279076836793
170.83140617507564
t=12
4409.916904887554
335.8312852074807
t=11
4591.670627747121
230.91143972129635
t=10
5364.239132005265
205.66097233470347
t=9
6422.3558001278025
324.412727787766
t=8
7638.4027831857165
346.17897742457984
t=7
9835.02920533717
333.9784211993891
t=6
13355.963438738643
305.4033858386513
t=5
18520.769759733015
406.22019708894055
t=4
22414.433597053467
564.7943713863401
t=3
19677.237620558502
267.47443140971797
t=2
22959.804503082138
118.00272135180433
t=1
25151.257311893994
10.361441618863196
t=0
24444.004756583952
4.184370676781917e-

In [22]:
class ADPPolicy():
    def __init__(self, V, data,gamma = 0.9):
        self.V = V
        self.data = data
        self.gamma = gamma

    def __call__(self, t, h, e_on, wind, wind_previous, price, price_previous,data):
        scenarios, scenario_probs = generate_scenarios(wind, price, wind_previous, price_previous, 1)
        decision, _ = value_minimization(self.V, t, [t,h,e_on,wind,wind_previous,price,price_previous], scenarios[0], self.gamma)
        return decision


adp_policy = ADPPolicy(V, data)
sim_MDP(10,adp_policy,winds,prices)

Simulating MDP: 100%|██████████| 10/10 [00:13<00:00,  1.40s/it]


(np.float64(429.0134423850388),
 [np.float64(23.862385671014),
  np.float64(87.71661718473368),
  np.float64(311.7918991088412),
  np.float64(500.3320316028079),
  np.float64(1035.7140267507584),
  np.float64(142.83369483690421),
  np.float64(616.9210524194654),
  np.float64(864.8124024669761),
  np.float64(533.694673587),
  np.float64(172.45564022188677)])