# Programación dinámica

## Librerías

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from utils import find_policy, gambler_problem, v_k_inplace, v_k_step

sns.set_context("notebook", font_scale=2)
plt.rcParams["text.usetex"] = True

Siendo $\gamma < 0.9$

In [None]:
gammas = {float(gamma): {} for gamma in np.linspace(0.0, 0.9, 1000)}  # Descuento

In [None]:
# Definición de recompensas y matriz de transición
r = np.array([1, 2])  # R(s1) = 1, R(s2) = 2
P = np.array(
    [
        [0, 1],  # P(s1 -> s1) = 0, P(s1 -> s2) = 1
        [1, 0],
    ]
)  # P(s2 -> s1) = 1, P(s2 -> s2) = 0

#### Step

In [None]:
for gamma in gammas:
    gammas[gamma]["$v_{k+1}$"], gammas[gamma]["iter"] = v_k_step(P, r, gamma)

In [None]:
plt.figure()
sns.lineplot(x=gammas.keys(), y=[gammas[gamma]["iter"] for gamma in gammas])
plt.legend(frameon=False)
plt.xlabel("$\gamma$")
plt.ylabel("Iteraciones")
plt.savefig("../img/gamma_iters.png", transparent=True, bbox_inches="tight")
plt.close()

In [None]:
plt.figure()
sns.lineplot(x=gammas.keys(), y=[gammas[gamma]["$v_{k+1}$"][0] for gamma in gammas], label="$v_{k+1,0}$")
plt.plot(gammas.keys(), [gammas[gamma]["$v_{k+1}$"][1] for gamma in gammas], label="$v_{k+1,1}$")
plt.legend(frameon=False)
plt.xlabel("$\gamma$")
plt.ylabel("$v_{k+1}$")
plt.savefig("../img/gamma_v_k.png", transparent=True, bbox_inches="tight")
plt.close()

#### Inplace

In [None]:
for gamma in gammas:
    gammas[gamma]["$v_{k+1}$"], gammas[gamma]["iter"] = v_k_inplace(r, gamma)

In [None]:
plt.figure()
sns.lineplot(x=gammas.keys(), y=[gammas[gamma]["iter"] for gamma in gammas])
plt.legend(frameon=False)
plt.xlabel("$\gamma$")
plt.ylabel("Iteraciones")
plt.savefig("../img/gamma_iters-inplace.png", transparent=True, bbox_inches="tight")
plt.close()

In [None]:
plt.figure()
sns.lineplot(x=gammas.keys(), y=[gammas[gamma]["$v_{k+1}$"][0] for gamma in gammas], label="$v_{k+1,0}$")
plt.plot(gammas.keys(), [gammas[gamma]["$v_{k+1}$"][1] for gamma in gammas], label="$v_{k+1,1}$")
plt.legend(frameon=False)
plt.xlabel("$\gamma$")
plt.ylabel("$v_{k+1}$")
plt.savefig("../img/gamma_v_k-inplace.png", transparent=True, bbox_inches="tight")
plt.close()

### Gambler's problem

In [None]:
N = 101
phs = [0.25, 0.55]

In [None]:
for ph in phs:
    sweep_p = []
    policy_p = []
    x = np.arange(1, N, 1)

    V, sweeps = gambler_problem(N, ph)
    policy = find_policy(V, ph, N)

    plt.figure()
    for arr in sweeps:
        sweep_p.append(arr.flatten())
    for arr in sweep_p:
        plt.plot(arr)
    plt.xlabel("Capital")
    plt.ylabel("Valor estimado")
    plt.savefig(f"../img/sweeps_{ph}.png", transparent=True, bbox_inches="tight")
    plt.close()

    plt.figure()
    for arr in policy:
        policy_p.append(arr)
    sns.lineplot(x=x, y=policy_p)
    plt.xlabel("Capital")
    plt.ylabel("Política final (apuesta)")
    plt.savefig(f"../img/policy_{ph}.png", transparent=True, bbox_inches="tight")
    plt.close()