-
Notifications
You must be signed in to change notification settings - Fork 0
/
pp_sddp.py
365 lines (302 loc) · 12.6 KB
/
pp_sddp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""Pereira-Pinto algorithm for stochastic infinite-horizon DP.
This module contains an implementation of the Pereira-Pinto SDDSP
algorithm to solve stochastic infinite-horizon linear programs, in a
classical dynamic programming fashion. The algorithm uses a Benders
decomposition approach.
"""
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
import sys
import math
import random
import time
import cplex
import b2_sddp_primal_bound as pb
import lotsizing as ls
import b2_sddp_utility as util
from cplex import SparsePair
from b2_sddp_settings import B2Settings
from b2_sddp import create_master_problem, create_slave_problem
from b2_sddp_forward import forward_pass, single_forward_step
from b2_sddp_backward import backward_pass
def get_time_horizon_length(delta, ub, max_error):
"""Determine length of the time horizon.
Determine how many stages of the problem are necessary to reduce
the error below the given value, based on the cost of a feasible
primal solution.
Parameters
----------
delta : float
Discount factor.
ub : float
Expected cost of a primal solution for a single stage.
max_error : float
Maximum absolute error allowed.
Returns
-------
int
The length of the time horizon.
"""
assert(0 <= delta < 1)
assert(max_error > 0)
tau = math.log(max_error * (1 - delta) / abs(ub), delta) - 1
return int(math.ceil(tau))
# -- end function
def pp_sddp(settings, n1, n2, k, m, p, q, x_obj, y_obj, A_rows, G_rows,
T_rows, D_rows, W_rows, b, d, w, scenarios, prob, z_lower,
delta, output_stream = sys.stdout):
"""Apply the Pereira-Pinto SDDP algorithm for dynamic programming.
Solve an infinite horizon stochastic dynamic program using the
Pereira-Pinto SDDP algorithm, given the initial data. It is
assumed that this is a minimization problem and the constraints
are in >= form. The problem is written as:
.. math ::
\min \{\sum_{t=0}^\infty c^T x_t + h^T y_t : \forall t A x_t + G
y_t \ge b + Ty_{t-1}, D x_t \ge d, W y_t \ge w\}
Parameters
----------
settings : b2_sddp_settings.B2Settings
Algorithmic settings.
n1 : int
Number of x variables of the problem.
n2 : int
Number of y variables of the problem.
k : int
Number of scenarios.
m : int
Number of rows in the first set of constraints
A x + G y >= b + T y_{t-1}.
p : int
Number of rows in the second set of constraints D x >= d.
q : int
Number of rows in the third set of constraints W x >= w.
x_obj : List[float]
Cost coefficient for the x variables.
y_obj : List[float]
Cost coefficient for the y (state) variables.
A_rows : List[List[int], List[float]]
The coefficients of the rows of the A matrix in A x + G y >=
b + T y_{t-1}. These are in sparse form: indices and elements.
G_rows : List[List[int], List[float]]
The coefficients of the rows of the G matrix in A x + G y >=
b + T y_{t-1}. These are in sparse form: indices and elements.
T_rows : List[List[int], List[float]]
The coefficients of the rows of the T matrix in A x + G y >=
b + T y_{t-1}. These are in sparse form: indices and elements.
D_rows : List[List[int], List[float]]
The coefficients of the rows of the D matrix in D x >= d.
These are in sparse form: indices and elements.
W_rows : List[List[int], List[float]]
The coefficients of the rows of the W matrix in W y >= w.
These are in sparse form: indices and elements.
b : List[float]
The rhs of the first set of constraints in the master.
d : List[float]
The rhs of the second set of constraints in the master.
w : List[float]
The rhs of the third set of constraints in the master.
scenarios : List[List[float]]
The rhs vector (b, d, w) for each scenario.
prob : List[float]
The probability of each scenario.
z_lower : List[float]
Lower bounds on the objective value of each scenario.
delta : float
Discount factor, 0 <= delta < 1.
output_stream : file
Output stream. Must have a 'write' and 'flush' method.
Returns
-------
(cplex.Cplex, float, float, float, int, float)
The master problem after convergence, the best upper bound,
the best lower bound, the final gap (as a fraction, i.e. not
in percentage point), the final length of the time horizon
tau, the total CPU time in seconds.
"""
assert(isinstance(settings, B2Settings))
assert(len(x_obj) == n1)
assert(len(y_obj) == n2)
assert(len(A_rows) == m)
assert(len(G_rows) == m)
assert(len(T_rows) == m)
assert(len(D_rows) == p)
assert(len(W_rows) == q)
assert(len(b) == m)
assert(len(d) == p)
assert(len(w) == q)
assert(len(scenarios) == k)
for i in range(k):
assert(len(scenarios[i]) == m + p + q)
assert(len(prob) == k)
assert(len(z_lower) == k)
assert(0 <= delta < 1)
# Start counting time
start_time = time.clock()
# Initialize inverse CDF of the probability distribution of the
# scenarios
inverse_cdf = util.InverseCDF(prob)
# Compute column representation of T
T_cols = util.row_matrix_to_col_matrix(m, n2, T_rows)
# Create the master and slave
master_orig = create_master_problem(settings, n1, n2, k, m, p, q,
x_obj, y_obj, A_rows, G_rows,
D_rows, W_rows, b, d, w, scenarios,
prob, z_lower, delta)
slave_orig = create_slave_problem(settings, n1, n2, k, m, p, q,
x_obj, y_obj, A_rows, G_rows, D_rows,
W_rows, d, w, prob, z_lower, delta)
# Store solutions for all forward passes
fwpass_sol = list()
# Store cost of primal solutions, i.e. sample paths
cost_sol = list()
# The best primal bound available so far
best_ub = cplex.infinity
# Is the stopping criterion satisfied?
stop = False
# Create master problem with fixed y variables
pb_prob = pb.create_p_fixed_y_problem(settings, n1, n2, k, m, p, q,
x_obj, y_obj, A_rows, G_rows,
T_rows, D_rows, W_rows, d, w,
scenarios)
if (not settings.print_cplex_output):
pb_prob.set_results_stream(None)
else:
pb_prob.set_results_stream(output_stream)
# Compute cost of a single stage, using a fixed y policy
ub_tail = pb.cost_tail_fixed_y(settings, n1, n2, k, m, p, q, T_cols, [0]*n2,
0, delta, scenarios, prob, 0, pb_prob)
# Fraction of the absolute gap used to upper bound the error in
# the infinite tail
gap_frac = 0.9
# Length of the time horizon
tau = get_time_horizon_length(delta, ub_tail,
settings.gap_absolute * gap_frac)
print('Pereira-Pinto time horizon of length {:d}'.format(tau),
file = output_stream)
output_stream.flush()
# Make copies of master and slave
master = [cplex.Cplex(master_orig) for t in range(tau)]
slave = [cplex.Cplex(slave_orig) for t in range(tau)]
# Cut activity levels
cut_activity = [list() for t in range(tau)]
for t in range(tau):
if (not settings.print_cplex_output):
master[t].set_results_stream(None)
slave[t].set_results_stream(None)
else:
master[t].set_results_stream(output_stream)
slave[t].set_results_stream(output_stream)
# Start the PP SDDP algorithm!
while (not stop):
print('*** Major iteration after {:d} passes'.format(len(fwpass_sol)),
file = output_stream)
output_stream.flush()
for num_pass in range(settings.num_passes):
# Collect forward solution
x, y, z = single_forward_step(settings, n1, n2, k, m, p, q,
T_cols, [0]*n2, scenarios,
prob, inverse_cdf, master[0],
cut_activity[0])
sol_x, sol_y, sol_z = [x], [y], [z]
for t in range(1, tau):
# One step at a time
x, y, z = single_forward_step(settings, n1, n2, k, m, p, q,
T_cols, sol_y[-1], scenarios,
prob, inverse_cdf, master[t],
cut_activity[t])
sol_x.append(x)
sol_y.append(y)
sol_z.append(z)
# Store all LP solutions
fwpass_sol.append((sol_x, sol_y, sol_z))
# Computation of upper bounds
cost_sol.append(pb.cost_primal_sol(sol_x, sol_y, x_obj, y_obj,
tau, delta))
# Backward pass: collect Benders cuts
for t in reversed(range(tau)):
cuts = backward_pass(settings, n1, n2, k, m, p, q, 1,
T_cols, scenarios, prob,
[sol_x[t]], [sol_y[t]], [sol_z[t]],
slave[t], num_pass)
util.add_cuts(master[t], cuts)
# -- end passes
# Clean up LPs
for t in range(1, tau):
cut_activity[t] = util.purge_cuts(settings, m, p, q,
cut_activity[t],
master[t], slave[t])
# Primal bound: compute cost based on samples
ub = pb.primal_bound(settings, cost_sol)
# Get current dual bound and compute optimality gap
master[0].linear_constraints.set_rhs([(i, b[i]) for i in range(m)])
master[0].solve()
lb = master[0].solution.get_objective_value()
gap = ((ub + settings.gap_absolute * gap_frac - lb) /
(ub + 1.0e-10))
# Report progress status
print('Primal bound: {:f} '.format(ub) +
'Dual bound: {:f} '.format(lb) +
'gap {:.2f} %'.format(100*gap),
file = output_stream)
output_stream.flush()
cpu_time = time.clock() - start_time
# Stop when the optimality gap is small enough
if (tau >= settings.max_tau or
cpu_time >= settings.max_cpu_time or
gap <= settings.gap_relative or
(ub - lb) <= settings.gap_absolute * (1 - gap_frac)):
stop = True
# -- end main loop
# Print last solution and exit
util.print_LP_solution(settings, n1, n2, k, master[0],
'Final solution of the master:',
output_stream = output_stream)
total_time = time.clock() - start_time
print(file = output_stream)
print('Summary:', end = ' ', file = output_stream)
print('ub {:.3f} lb {:.3f} '.format(ub + settings.gap_absolute * gap_frac,
lb) +
'gap {:.2f} % tau {:d} '.format(100 * gap, tau) +
'time {:.4f}'.format(total_time),
file = output_stream)
output_stream.flush()
return (master[0], ub + settings.gap_absolute * gap_frac, lb, gap,
tau, total_time)
# -- end function
if (__name__ == "__main__"):
# Test the algorithm on a small problem
if (sys.version_info[0] >= 3):
print('Error: Python 3.* is currently not supported.')
print('Please use Python 2.7')
sys.exit()
# Settings for the problem
settings = B2Settings(add_cuts_immediately = False,
aggregate_cuts = False,
rounds_before_cut_purge = 20,
gap_absolute = 100)
# Define problem - input
m = 2
n1 = 2
n2 = 2
k = 2
p = 2
q = 2
delta = 0.99
x_obj = [3, 2]
y_obj = [1.75, 1.75]
A_rows = [[[0], [1]], [[0, 1], [1, 2]]]
G_rows = [[[0], [1]], [[1], [1]]]
T_rows = [[[0], [1]], [[1], [1]]]
D_rows = [[[0], [1]], [[1], [1]]]
W_rows = [[[0], [1]], [[1], [1]]]
b = [1, 1]
d = [0, 0]
w = [0, 0]
scenarios = [[2, 1] + d + w, [1, 4] + d + w]
prob = [1 / 2, 1 / 2]
z_lower = [0, 0]
# Call main function here
master = pp_sddp(settings, n1, n2, k, m, p, q, x_obj, y_obj, A_rows,
G_rows, T_rows, D_rows, W_rows, b, d, w,
scenarios, prob, z_lower, delta)