In [2]:
import numpy as np

In [3]:
# ------------------ Parámetros configurables ------------------ #
max_stock = 20                # Máximo stock permitido (permite sobrestock temporal)
max_order = 20                # Máximo número de tanques que se pueden pedir
n_stock = max_stock + 1       # Estados posibles
n_actions = max_order + 1     # Acciones posibles
T_horizon = 10                # Cantidad de períodos

demanda_vals = [0, 1, 2]
demanda_probs = [0.4, 0.4, 0.2]
precio_venta_tanque = 2.0
precio_venta_buque = 0.7
costo_compra = 1.6
costo_almacenamiento = 0.02

descuento = 0.8

In [4]:
# ------------------ Inicializar V y política óptima ------------------ #
V = np.zeros((T_horizon + 1, n_stock))           # Valor esperado
policy = np.zeros((T_horizon, n_stock), dtype=int)  # Política óptima

In [5]:
# ------------------ Función de recompensa ------------------ #
def recompensa(s, d, r):
    stock_despues_demanda = max(0, s - r)
    vendidos_tanque = min(s, r)
    vendidos_buque = max(0, s - 2 - r)  # Se venden desde buque si excede el stock tolerado (2)
    restantes = max(0, stock_despues_demanda - max(0, s - 2 - r))

    ingreso = (
        precio_venta_tanque * vendidos_tanque +
        precio_venta_buque * vendidos_buque
    )
    costo = costo_compra * d
    costo_alm = costo_almacenamiento * restantes
    return ingreso - costo - costo_alm

In [6]:
# ------------------ Backward induction ------------------ #
for t in reversed(range(T_horizon)):
    for s in range(n_stock):
        best_value = -np.inf
        best_action = 0
        for a in range(n_actions):
            d = a
            expected_value = 0.0
            for r, p_r in zip(demanda_vals, demanda_probs):
                stock_despues_demanda = max(0, s - r)
                nuevo_stock = min(stock_despues_demanda + d, max_stock)
                reward = recompensa(s, d, r)
                expected_value += p_r * (reward + descuento * V[t + 1, nuevo_stock])

            if expected_value > best_value:
                best_value = expected_value
                best_action = a

        V[t, s] = best_value
        policy[t, s] = best_action

In [7]:
# ------------------ Mostrar política óptima ------------------ #
print("Política óptima (por período y stock):")
for t in range(T_horizon):
    print(f"\nPeríodo {t}:")
    for s in range(n_stock):
        print(f"  Stock {s}: pedir {policy[t, s]} tanques")

Política óptima (por período y stock):

Período 0:
  Stock 0: pedir 20 tanques
  Stock 1: pedir 19 tanques
  Stock 2: pedir 18 tanques
  Stock 3: pedir 17 tanques
  Stock 4: pedir 16 tanques
  Stock 5: pedir 15 tanques
  Stock 6: pedir 14 tanques
  Stock 7: pedir 13 tanques
  Stock 8: pedir 12 tanques
  Stock 9: pedir 11 tanques
  Stock 10: pedir 10 tanques
  Stock 11: pedir 9 tanques
  Stock 12: pedir 8 tanques
  Stock 13: pedir 7 tanques
  Stock 14: pedir 6 tanques
  Stock 15: pedir 5 tanques
  Stock 16: pedir 4 tanques
  Stock 17: pedir 3 tanques
  Stock 18: pedir 2 tanques
  Stock 19: pedir 1 tanques
  Stock 20: pedir 0 tanques

Período 1:
  Stock 0: pedir 20 tanques
  Stock 1: pedir 19 tanques
  Stock 2: pedir 18 tanques
  Stock 3: pedir 17 tanques
  Stock 4: pedir 16 tanques
  Stock 5: pedir 15 tanques
  Stock 6: pedir 14 tanques
  Stock 7: pedir 13 tanques
  Stock 8: pedir 12 tanques
  Stock 9: pedir 11 tanques
  Stock 10: pedir 10 tanques
  Stock 11: pedir 9 tanques
  Stock 12:

In [10]:
for s in range(n_stock):
    print(f"  Stock {s}: pedir {policy[9, s]} tanques")

  Stock 0: pedir 0 tanques
  Stock 1: pedir 0 tanques
  Stock 2: pedir 0 tanques
  Stock 3: pedir 0 tanques
  Stock 4: pedir 0 tanques
  Stock 5: pedir 0 tanques
  Stock 6: pedir 0 tanques
  Stock 7: pedir 0 tanques
  Stock 8: pedir 0 tanques
  Stock 9: pedir 0 tanques
  Stock 10: pedir 0 tanques
  Stock 11: pedir 0 tanques
  Stock 12: pedir 0 tanques
  Stock 13: pedir 0 tanques
  Stock 14: pedir 0 tanques
  Stock 15: pedir 0 tanques
  Stock 16: pedir 0 tanques
  Stock 17: pedir 0 tanques
  Stock 18: pedir 0 tanques
  Stock 19: pedir 0 tanques
  Stock 20: pedir 0 tanques
