In [41]:
import numpy as np
import mdptoolbox

In [42]:
# ------------------ Parámetros configurables ------------------ #
max_storage = 20       # Límite físico de tanques que se pueden almacenar
max_order = 20          # Máximo número de tanques que se pueden pedir
max_stock = 20         # Máximo estado considerado (permite sobrestock)
demanda_vals = [0, 1, 2]
demanda_probs = [0.4, 0.4, 0.2]
precio_venta_tanque = 2.0
precio_venta_buque = 0.7
costo_compra = 1.6
costo_almacenamiento = 0.02
descuento = 0.8

n_states = max_stock + 1
n_actions = max_order + 1

In [43]:
# ------------------ Inicializar matrices T y R ------------------ #
T = np.zeros((n_actions, n_states, n_states))
R = np.zeros((n_states, n_actions))

In [44]:
# ------------------ Función de recompensa ------------------ #
def recompensa(s, d, r):
    vendidos_tanque = min(s, r)
    restantes = max(0, s - r)
    vendidos_buque = max(0, s - 2 - r)

    ingreso = (
        precio_venta_tanque * vendidos_tanque +
        precio_venta_buque * vendidos_buque
    )
    costo = costo_compra * d
    costo_alm = costo_almacenamiento * restantes
    return ingreso - costo - costo_alm

In [45]:
# ------------------ Construir matrices T y R ------------------ #
for a in range(n_actions):
    d = a  # acción = cantidad a pedir
    for s in range(n_states):
        for r, p_r in zip(demanda_vals, demanda_probs):
            s_after_demand = max(0, s - r)  # stock restante tras demanda
            s_prim = min(s_after_demand + d, max_stock)  # stock futuro con llegada del pedido
            T[a, s, s_prim] += p_r
            R[s, a] += p_r * recompensa(s, d, r)

In [46]:
# ------------------ Resolver con Value Iteration ------------------ #
vi = mdptoolbox.mdp.ValueIteration(transitions=T, reward=R, discount=descuento)
vi.run()

In [47]:
# ------------------ Mostrar política ------------------ #
print("Política óptima (tanques a pedir según stock actual):")
for s in range(n_states):
    print(f"Stock {s}: pedir {vi.policy[s]} tanques")

Política óptima (tanques a pedir según stock actual):
Stock 0: pedir 20 tanques
Stock 1: pedir 19 tanques
Stock 2: pedir 18 tanques
Stock 3: pedir 17 tanques
Stock 4: pedir 16 tanques
Stock 5: pedir 15 tanques
Stock 6: pedir 14 tanques
Stock 7: pedir 13 tanques
Stock 8: pedir 12 tanques
Stock 9: pedir 11 tanques
Stock 10: pedir 10 tanques
Stock 11: pedir 9 tanques
Stock 12: pedir 8 tanques
Stock 13: pedir 7 tanques
Stock 14: pedir 6 tanques
Stock 15: pedir 5 tanques
Stock 16: pedir 4 tanques
Stock 17: pedir 3 tanques
Stock 18: pedir 2 tanques
Stock 19: pedir 1 tanques
Stock 20: pedir 0 tanques


In [48]:
T

array([[[1. , 0. , 0. , ..., 0. , 0. , 0. ],
        [0.6, 0.4, 0. , ..., 0. , 0. , 0. ],
        [0.2, 0.4, 0.4, ..., 0. , 0. , 0. ],
        ...,
        [0. , 0. , 0. , ..., 0.4, 0. , 0. ],
        [0. , 0. , 0. , ..., 0.4, 0.4, 0. ],
        [0. , 0. , 0. , ..., 0.2, 0.4, 0.4]],

       [[0. , 1. , 0. , ..., 0. , 0. , 0. ],
        [0. , 0.6, 0.4, ..., 0. , 0. , 0. ],
        [0. , 0.2, 0.4, ..., 0. , 0. , 0. ],
        ...,
        [0. , 0. , 0. , ..., 0.4, 0.4, 0. ],
        [0. , 0. , 0. , ..., 0.2, 0.4, 0.4],
        [0. , 0. , 0. , ..., 0. , 0.2, 0.8]],

       [[0. , 0. , 1. , ..., 0. , 0. , 0. ],
        [0. , 0. , 0.6, ..., 0. , 0. , 0. ],
        [0. , 0. , 0.2, ..., 0. , 0. , 0. ],
        ...,
        [0. , 0. , 0. , ..., 0.2, 0.4, 0.4],
        [0. , 0. , 0. , ..., 0. , 0.2, 0.8],
        [0. , 0. , 0. , ..., 0. , 0. , 1. ]],

       ...,

       [[0. , 0. , 0. , ..., 1. , 0. , 0. ],
        [0. , 0. , 0. , ..., 0.6, 0.4, 0. ],
        [0. , 0. , 0. , ..., 0.2, 0.4, 0.4