In [1]:
import numpy as np
import pandas as pd
import data_import
from policy_iter import PolicyIteration

In [2]:
pd_data: pd.DataFrame = data_import.read_files_to_pd_dataframe(
    [f"../outs/grid/itr_{i}.xml.out" for i in range(500)]
    # [f"../outs/general/K4_with_middle.xml.out"]
)
pd_data.columns

Index(['index', 'N', 'Q', 'v0', 'Minv', 'M', 'ipopt_sol', 'policy_sol',
       'policy_converged', 'ipopt_converged', 'f_name', 'Q_size', 'Q_inv',
       'Q_inv_min_val', 'Q_inv_num_lt_0', 'Q_det', 'eigenvalues', 'cond_num',
       'cond_num_no_inf', 'Q_num_off_diag_entries', 'thetas', 'min_theta',
       'rank', 'nullity', 'ipopt_delta_KE', 'ipopt_policy', 'b'],
      dtype='object')

In [3]:
np.set_printoptions(linewidth=1000, edgeitems=300)
for i in range(len(pd_data)):
    pi = PolicyIteration(
        pd_data['Q'][i],
        pd_data['b'][i],
        # init_value=pd_data_tests_ipopt_start['ipopt_sol'][request.param],
        name=f"{i}"
    )
    pi.max_iter = 100
    pi.solve()

8: DIVERGING CYCLE FROM 18 to 33 (length: 15)
[   0.            0.            0.           26.44873344    0.            0.            0.           68.05479557    0.          -50.87292642    0.            0.            0.         -296.24230416    0.            0.          322.02629389  -22.92555544    0.           80.24487355    0.            0.          565.09622939  405.96905658    0.         -404.66201854 -659.31916633    0.          309.87089439   11.46390475]
[  0.           0.         -38.5353388  106.85669712  36.16258896   0.         -59.72841655  12.80828743 -24.67575418   0.          11.33817665   0.           0.           0.           0.           0.          28.11080874   0.         -46.82822786   4.45321184   0.          76.37920719 -13.69350797 -10.15811595   0.           0.           0.           0.         -10.91332056  11.57716492]
[ -3.66115583   0.           0.          31.40902812  -0.96356791   0.           0.          23.34373542   0.           0.         -19.44944

In [4]:
i = 452
Q = pd_data['Q'][i]
b = pd_data['b'][i]
pi_452 = PolicyIteration(Q, b)
pi_452.max_iter = 100
pi_452.solve()
np.linalg.matrix_rank(Q, tol=10e-7) # rank is 15 (once tol becomes 10e-8 then is 16)
# double check that ipopt sol does in fact work
ipopt_sol = pd_data['ipopt_sol'][i]
np.linalg.norm(np.minimum(Q @ ipopt_sol - b, ipopt_sol))    # note: ipopt tolerance is a tiny bit bigger than 10e-6

Iterator: DIVERGING CYCLE FROM 7 to 9 (length: 2)
[ 17.86974284   0.           4.0617261   29.55740592   0.           0.          11.21524141   0.           0.         -46.54512454   0.           0.          73.17039008   0.           0.           0.           1.23265037 -37.10195673   0.          13.45879979  42.57399719   0.        ]
[17.11467477  0.          4.06170335 23.7585645   0.         -5.479489   21.85778854  0.          0.          0.          0.          0.         26.68447702  0.          0.          0.          9.16938022  0.          0.         13.45880744 36.80974954  0.        ]


1.0465384010084121e-05

In [32]:
def get_policy_submatrix(Q, p):
    policy_matrix = np.zeros(Q.shape)  # a matrix that will be used extract a sub-matrix of Q into A
    for i in range(len(p)):
        if p[i] == 1:
            policy_matrix[i,i] = 1
    I = np.eye(Q.shape[0])
    # get the submatrix of Q and store in A. A is the same size as Q, but has some rows/columns replaced by identity
    # depending on self.policy
    return policy_matrix @ Q @ policy_matrix + I - policy_matrix


def rank_submatrix(Q, p):
    return np.linalg.matrix_rank(get_policy_submatrix(Q, p), tol=1e-06)

In [67]:
ipopt_policy = pd_data['ipopt_policy'][181]
print(ipopt_policy)
i = 181
Q = pd_data['Q'][i]
b = pd_data['b'][i]
pi = PolicyIteration(Q, b, policy=ipopt_policy.copy())
pi.max_iter = 100
pi.solve()
print(np.linalg.matrix_rank(Q, tol=10e-7)) # rank is 15 (once tol becomes 10e-8 then is 16) (out of ~25 rows)
# double check that ipopt sol does in fact work
ipopt_sol = pd_data['ipopt_sol'][i]
np.linalg.norm(np.minimum(Q @ ipopt_sol - b, ipopt_sol))    # note: ipopt tolerance is a tiny bit bigger than 10e-7
print(ipopt_sol)

v1 = pi.intermediate_values[4]
v2 = pi.intermediate_values[5]
print(v1)
print(v2)
p1 = pi.intermediate_policies[4]
p2 = pi.intermediate_policies[5]
print(p1)
print(p2)


Q1 = get_policy_submatrix(Q, p1)
Q2 = get_policy_submatrix(Q, p2)
print("Q1, Q1 ranks")
print(np.linalg.matrix_rank(Q1, tol=10e-8))
print(np.linalg.matrix_rank(Q2, tol=10e-8))

p_or = [1 if p1[k] + p2[k] > 0 else 0 for k in range(Q.shape[0])]
Q_or = get_policy_submatrix(Q, p_or)
print(p_or)
print(np.linalg.matrix_rank(Q_or, tol=10e-6))
print(np.linalg.norm(Q_or @ (v1 - v2))) # little bigger than 2 * 10e-5

# p_and = [1 if p1[k] + p2[k] == 2 else 0 for k in range(Q.shape[0])]
# Q_and = get_policy_submatrix(Q, p_and)
# print(np.linalg.matrix_rank(Q_and, tol=10e-6))
# print(np.linalg.norm(Q_and @ (v1 - v2)))

def objective(Q, b, x):
    return np.linalg.norm(np.minimum(Q @ x - b, x))
print(objective(Q, b, v1))

pi_neg1 = pi.intermediate_policies[2]
print(rank_submatrix(Q, pi_neg1))
pi_neg2 = pi.intermediate_policies[1]
print(rank_submatrix(Q, pi_neg2))
pi_neg3 = pi.intermediate_policies[0]
print(pi_neg3)
print(rank_submatrix(Q, pi_neg3))

print(Q1)

nodes = ""
edges = ""
for x in range(Q1.shape[0]):
    for y in range(Q1.shape[0]):
        if x == y and Q1[x,y] != 1.0:
            nodes += f"{x}\n"
            continue
        if Q1[x,y] != 0.0:
            edges += f"{x} {y}\n"
print(nodes)
print(edges)

print()
print()

nodes = ""
edges = ""
for x in range(Q1.shape[0]):
    for y in range(Q1.shape[0]):
        if x == y and Q2[x,y] != 1.0:
            nodes += f"{x}\n"
            continue
        if Q2[x,y] != 0.0:
            edges += f"{x} {y}\n"
print(nodes)
print(edges)


Q_ipopt = get_policy_submatrix(Q, ipopt_policy)

nodes = ""
edges = ""
for x in range(Q1.shape[0]):
    for y in range(Q1.shape[0]):
        if x == y and Q_ipopt[x,y] != 1.0:
            nodes += f"{x}\n"
            continue
        if Q_ipopt[x,y] != 0.0:
            edges += f"{x} {y}\n"
print(nodes)
print(edges)

[0, 1, 3, 9, 10, 11, 17, 18, 20, 22, 26]
Iterator: DIVERGING CYCLE FROM 4 to 6 (length: 2)
[   7.46347457    5.29292007    0.           14.16063355    0.            0.            0.            0.            0.           27.56680236   24.39499529  174.71497646    0.            0.         -167.84849298    0.            0.           36.38639091   18.99368475    0.            2.45334801    0.           41.59688672    0.            0.            0.          188.43588185]
[  7.46347723   5.29291156   0.          14.16058166   0.           0.           0.           0.           0.          27.5671186   10.78574189  16.34594234   0.           0.           0.           0.           0.          36.38649483  18.99367708   0.          20.87230405   0.          41.59666008 -12.5746325    0.           0.          29.20421741]
15
[ 7.7554  4.7415  0.     14.7006  0.      0.      0.      0.      0.     27.1711 12.291  13.2105  0.      0.      0.      0.      0.     35.3198 21.058   0.     16.299   0. 

In [60]:
i = 181
Q = pd_data['Q'][i]
b = pd_data['b'][i]
ipopt_policy = pd_data['ipopt_policy'][i]
pi_ipopt = [1 if x in ipopt_policy else 0 for x in range(Q.shape[0])]
print(pi_ipopt)
print(ipopt_policy)
initial_policy = [1] * 27
initial_policy[10] = 0
pi = PolicyIteration(Q, b, initial_policy=initial_policy)
pi.max_iter = 100
print(pi.solve())
print(len(pi.intermediate_policies))

[1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1]
[0, 1, 3, 9, 10, 11, 17, 18, 20, 22, 26]
Iterator: DIVERGING CYCLE FROM 15 to 18 (length: 3)
[  9.62451104   0.77963204   0.          11.15781413   0.           0.           0.           0.           0.           0.          53.88907815  -2.50031734   0.           0.           0.           6.20242751  42.81610235  42.49249007   0.         -15.60756831   0.           0.          75.49732472  -9.87421576   0.         -37.75901536  34.84084885]
[ 10.80376642  -1.43301486   0.          13.58168149   0.           0.           0.           0.           0.          41.19579084   4.44805309   0.           0.           4.40078335   0.         -26.42210376  12.38717682  37.69842858  23.93170551   0.           0.           0.          31.52691869   0.           0.           0.          17.29816202]
[ 1.00230769e+01  0.00000000e+00 -2.69766486e-02  1.10728931e+01  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.000