In [33]:
import sys
sys.path.append("v2_Assignment_Codes")  # Add the folder to the search path

#load data
from v2_data import get_fixed_data
from PriceProcess import price_model
from WindProcess import wind_model
from utils import generate_time_series,generate_experiment_series

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from pyomo.environ import *
from mdp import check_feasibility,sim_MDP_exp, sim_MDP, generate_scenarios

%load_ext autoreload
%autoreload 2
plt.rcParams.update({'font.size': 13})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
data = get_fixed_data()
T = data['num_timeslots']

In [35]:
# ensure we always use the same time series for all tasks and experiments
prices,winds = generate_experiment_series()

# Task 3

In [36]:
import random

np.random.seed(42)
random.seed(42)

## Define Value Function

In [37]:
class ValueFunction():
    def __init__(self, T, state_dim):
        self.T = T
        self.state_dim = state_dim

        self.weights = np.ones((T, state_dim -1 + 1))  # -1 because state includes t, which we we told not to... +1 for the bias term
        
    def compute_value_explicit(self, t, state):
        state = state[1:] # Exclude time from state
        # Append 1 for the bias term to the state
        if t >= T:
            return 0
        state_with_bias = state + [1]
        value = 0
        for j in range(len(state_with_bias)):
            value += state_with_bias[j] * self.weights[t, j].item()
        return value
    
    def compute_value(self, t, states):
        states = states[:, 1:] # Exclude time from state
        if t >= T:
            return np.zeros(states.shape[0])
        # Append 1 for the bias term to each state
        states_with_bias = np.hstack((states, np.ones((states.shape[0], 1))))
        return np.dot(states_with_bias, self.weights[t])

    def update(self, t, states, target_values):
        if t >= T:
            return 
        states = states[:,1:] # Exclude time from state
        # Append 1 for the bias term to each state
        states_with_bias = np.hstack((states, np.ones((states.shape[0], 1))))
        # Solve the least squares problem to find the optimal weights
        self.weights[t], _, _, _ = np.linalg.lstsq(states_with_bias, target_values, rcond=None)
    
    def squared_error(self, t, states, target_values):
        # Compute the squared error
        predicted_values = self.compute_value(t, states)
        return np.mean((predicted_values - target_values) ** 2)


## Function to sample representative pairs

In [38]:
def sample_representative_state_pairs(I):
    T = data['num_timeslots']
    state_pairs = np.zeros((T,I,7)) # seven state variables
    for i in range(I):
        # sample exogenous state variables
        # We always use the same initial coniditions be used?
        price, wind = generate_time_series(T)
        # sample endogenous state variables
        h = np.random.uniform(0, data['hydrogen_capacity'], T)
        e_on = np.random.choice([0, 1], T)
        for t in range(T):
            state = [t, h[t], e_on[t-1] if t > 0 else 0, wind[t], wind[t-1] if t > 0 else data['wind_power_previous'], price[t], price[t-1] if t > 0 else data['price_previous']]
            state_pairs[t, i] = state
    return state_pairs
state_pairs = sample_representative_state_pairs(50)


## Linear program to perform value function minimization

In [39]:
def value_minimization(V: ValueFunction,t,state_cur,scenarios, gamma,print_result=False): 

    t, h, e_on_tm1, wind, wind_previous, price, price_previous = state_cur

    # Create a model
    model = ConcreteModel()
    # Declare indexed variable for the price
    model.p_grid = Var(within=NonNegativeReals,name='p_grid')
    model.e_h2p = Var(within=NonNegativeReals,name='e_h2p')
    model.e_p2h = Var(within=NonNegativeReals,name='e_p2h')
    model.e_on = Var(within=Binary,name='e_on')
    
    # declare the new state
    model.next_e_on = Var(within=Binary,name='new_e_on')
    model.next_h = Var(within=NonNegativeReals,bounds=(0,data['hydrogen_capacity']),name='new_h')

    # Objective function
    def objective_rule(model):
        
        expected_next_value = 0
        for scenario in scenarios:
            scenario_state =  [t+1, model.next_h, model.next_e_on, scenario.wind, wind, scenario.price, price]
            expected_next_value += V.compute_value_explicit(int(t)+1,scenario_state)
        expected_next_value /= len(scenarios) 
        
        return price * model.p_grid + data['electrolyzer_cost']*model.e_on + gamma * expected_next_value

    model.profit = Objective(rule=objective_rule, sense=minimize)
    model.DemandConstraint = Constraint(rule=lambda model: model.p_grid + wind + data['conversion_h2p']*model.e_h2p - model.e_p2h >= data['demand_schedule'][int(t)])

    # contraints

    model.h_contraint = Constraint(expr=lambda model: model.next_h == h + data['conversion_p2h']*model.e_p2h-model.e_h2p)

    model.p2h_constraint = Constraint(rule=lambda model: model.e_h2p <= h)
    model.p2h_constraint2 = Constraint(rule=lambda model: data['conversion_h2p']*model.e_h2p <= data['h2p_max_rate'])

    model.conversion_contraint = Constraint(rule=lambda model: data['conversion_p2h'] * model.e_p2h <= data['p2h_max_rate']*e_on_tm1)

    model.e_on_constraint = Constraint(rule=lambda model: model.e_on == model.next_e_on)

    # Create a solver
    solver = SolverFactory('gurobi')  # Make sure Gurobi is installed and properly configured

    # Solve the model
    results = solver.solve(model, tee=False)
    if print_result:
        # Check if an optimal solution was found
        if results.solver.termination_condition == TerminationCondition.optimal:
            print("Optimal solution found")
            print(f"profit: {value(model.profit)}")
            print(f"p_grid: {value(model.p_grid)}")
            print(f"e_h2p: {value(model.e_h2p)}")
            print(f"e_p2h: {value(model.e_p2h)}")
            print(f"e_on: {value(model.e_on)}")
        else:
            print("No optimal solution found.")
    decision = (model.e_on.value,model.e_p2h.value,model.e_h2p.value,model.p_grid.value)
    return decision,value(model.profit)


## Perform Backward Value Function Approximation

In [40]:
def backward_value_approx(V, state_pairs, K, data,gamma=0.9):
    T = data['num_timeslots']
    I = state_pairs.shape[1]
    for t in range(T-1, -1, -1):
        print(f"t={t}")
        value_targets = np.zeros(I)
        # go trough state pairs
        for i in range(I):
            state = state_pairs[t, i]
            _, h, e_on_tm1, wind, wind_previous, price, price_previous = state
            # we only need the 
            scenarios, scenario_probs = generate_scenarios(wind, price, wind_previous, price_previous, 1, k=K, n_samples=K)
            _,value_targets[i] = value_minimization(V, t, state, scenarios[1], gamma)
            
        print(V.squared_error(t, state_pairs[t], value_targets))
        V.update(t, state_pairs[t], value_targets)
        print(V.squared_error(t, state_pairs[t], value_targets))        
    return V


state_pairs = sample_representative_state_pairs(100)
V = backward_value_approx(ValueFunction(data['num_timeslots'],7),state_pairs,100,data)

t=23
2435.9808619897494
174.5833808403135
t=22
2494.5301137082397
127.2313807352166
t=21
2654.0716728626976
159.0356299029757
t=20
2480.7824050261825
121.424242875954
t=19
2638.718117324779
101.74745077288092
t=18
2600.985149174975
54.92640568481511
t=17
2492.4708362806405
25.74910524603375
t=16
2363.8754895971156
32.90116489058129
t=15
2334.191363733153
176.08931484967204
t=14
2550.8795826210844
55.38734052350306
t=13
2328.854396352654
214.78603519130004
t=12
2362.8502221256344
425.6989427285833
t=11
2715.9284669545277
291.098013319839
t=10
3753.789727771598
372.80590156067115
t=9
4398.811658594918
708.1441081374581
t=8
4246.3631284238145
877.232921379831
t=7
4483.259159169004
672.2661026735055
t=6
5660.272820108895
717.1769730367047
t=5
7255.900415000111
728.4464875856902
t=4
8316.284401087483
1297.5406679464927
t=3
6802.887880071528
1151.893960351918
t=2
4816.33734733621
205.18659114772908
t=1
2895.586559886602
23.645603499902737
t=0
1472.6210247740755
39.45294445870254


In [41]:
class ADPPolicy():
    def __init__(self, V, data,gamma = 0.9):
        self.V = V
        self.data = data
        self.gamma = gamma

    def __call__(self, t, h, e_on, wind, wind_previous, price, price_previous,data):
        scenarios, scenario_probs = generate_scenarios(wind, price, wind_previous, price_previous, 1)
        decision, _ = value_minimization(self.V, t, [t,h,e_on,wind,wind_previous,price,price_previous], scenarios[0], self.gamma)
        return decision


adp_policy = ADPPolicy(V, data)
sim_MDP(10,adp_policy,winds,prices)

Simulating MDP: 100%|██████████| 10/10 [00:18<00:00,  1.85s/it]


(np.float64(428.2049578319455),
 [np.float64(24.862385671014),
  np.float64(88.71661718473368),
  np.float64(312.7918991088412),
  np.float64(485.2773173486601),
  np.float64(1036.7140267643358),
  np.float64(143.83369483690421),
  np.float64(617.9210524194654),
  np.float64(865.8124024933712),
  np.float64(534.694673587),
  np.float64(171.42550890512874)])