In [None]:
import numpy as np
from scipy.special import  softmax
import matplotlib.pyplot as plt
import os
from matplotlib.collections import LineCollection
import re
import glob
import modelBasedTrain as mb

In [None]:
SC = 92 * 131 # Number of States
cSource = 45.5 # Source coordinates
rSource = 91
cols = 92
rows = 131
find_range = 1.1 # Source radius
gamma = 0.99975
reward = -(1 -gamma)
ActionDict = np.asarray([
            [-1,  0], # North
            [ 0,  1], # East
            [ 1,  0], # South
            [ 0, -1]  # West
        ])
dataC = np.load("celaniData/fine5.npy")
rho = np.zeros(SC)
rho[:cols] = (1-dataC[0,:cols])/np.sum((1-dataC[0,:cols]))
R = np.ones(SC*1) * -(1 - gamma)
for s in range(SC):
    r, c = s // 92, s % 92 
    if (r - rSource) ** 2 + (c -cSource) **2 < find_range**2:
        R[s::SC] = 0

In [None]:
def isEnd(state):
    s = np.unravel_index(state, (131, 92))
    return (s[0] - rSource) ** 2 + (s[1] -cSource) **2 < find_range**2

def choose_action(pi, o):
    return np.random.choice(4, p = pi[o, 0])

def move(s, a):
    r, c = s // cols, s % cols
    action = ActionDict[a]
    rNew = r + action[0]
    cNew = c + action[1]
    r = rNew if rNew >= 0 and rNew < 131 else r
    c = cNew if cNew >= 0 and cNew < 92 else c
    return r * 92 + c

def getTraj(pi, pObs, rho):
    start = np.random.choice(range(131*92), size=1, p = rho).astype(int)[0]
    print(start)
    obs = np.random.choice(2, p = pObs[:, start])
    curState = start
    history = np.zeros((3, 10000)).astype(int)
    history[0, 0] = start
    history[1, 0] = obs
    t = 0
    while not isEnd(curState) and t < 10000:
        if curState == 8417 or curState == 8418:
            print("AAAAAA")
        action = choose_action(pi, obs)
        history[2, t] = action
        newState = move(curState, action)
        obs = np.random.choice(2, p = pObs[:, newState])
        print(curState, newState, action, obs)
        t += 1
        history[0, t] = newState
        history[1, t] = obs
        curState = newState
    return history, t

def plot_and_save(totIter, thetas, obj, normDiff, diffFromOpt, paramas, name, delayed = None):
    color = "orange"
    style = "--"
    plt.figure(figsize=(15, 10))
    plt.suptitle(f"{name}\n Actor Lambda {paramas[0]}; Lr {paramas[2]}\nCritic Lambda {paramas[1]}; Lr {paramas[3]}")
    plt.subplot(2, 2, 1)
    plt.plot(range(totIter), thetas, label = "Theta Norm")
    plt.vlines(delayed, *plt.ylim(),color, style)
    plt.legend()
    plt.subplot(2,2,2)
    plt.plot(range(totIter), obj, label = "Objective")
    plt.hlines(-0.48, 0,totIter, "r", label = "Optimal")
    plt.ylim(-1, -0.475)
    plt.vlines(delayed, *plt.ylim(),color, style)
    plt.legend()
    plt.subplot(2, 2, 3)
    plt.plot(range(totIter), normDiff, label = "Diff from True")
    plt.hlines(0, 0,totIter, "k", label= "0")
    plt.vlines(delayed, *plt.ylim(),color, style)
    plt.legend()
    plt.subplot(2,2,4)
    plt.plot(range(totIter), diffFromOpt, label = "Diff from Optimal")
    plt.hlines(0, 0,totIter, "k", label= "0")
    plt.legend()
    plt.vlines(delayed, *plt.ylim(),color, style)
    plt.savefig(f"objOut/png/{name}_{paramas}.png")

In [None]:
[a for a in range(SC) if isEnd(a)]

In [None]:
pi = softmax(np.load("celaniData/thetaLoro.npy"), axis = 2)
print(pi)
hst, end = getTraj(pi, dataC, rho)
end

In [None]:
np.any(hst[0] == 8417)

In [None]:
hst[0, 0:end]

In [None]:
scemo = np.load("celaniData/fine5.npy").reshape((2, 131,92))
x, y = np.unravel_index(hst[0, :end], (131, 92))
cols = np.linspace(0,1, len(x))
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
fig, ax = plt.subplots(figsize=(20,100))
lc = LineCollection(segments, cmap="viridis")
lc.set_linewidth(2)
lc.set_array(cols)
line = ax.add_collection(lc)
ax.set_xlim([0,131])
ax.set_ylim([0,92])

ax.matshow(scemo[1].T, cmap = "binary")
ax.add_patch(plt.Circle((91,45.5), 1.1, color="r"))
xObs = x[np.where(hst[1, :end])]
yObs = y[np.where(hst[1, :end])]
ax.scatter(xObs, yObs, c="k")
# plt.xticks([])
# plt.yticks([])

In [None]:
x, y = np.unravel_index(hst[0, :end], (131, 92))
cols = np.linspace(0,1, len(x))
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
fig, ax = plt.subplots()
lc = LineCollection(segments, cmap="viridis")
lc.set_linewidth(2)
lc.set_array(cols)
line = ax.add_collection(lc)
ax.set_xlim([0,131])
ax.set_ylim([0,92])

In [None]:
thOPt = np.load("results/modelBased/M1/celani/fine5/sparseCPUTest/theta_Conv8000.npy")
Vopt, eta = mb.sparse_T_CPU(softmax(thOPt, axis=2), dataC, rSource, cSource, find_range, R, rho, 1)
Vopt

In [None]:
Vlambda = np.load("results/TD_Lambda/M1/lambda_actor0.8/lambda_critic0.8/alphaActor_0.1_Scheduled_alphaCritic_0.1/testActor_episodes_50000/Critics/critic38000.npy")
thLambda = np.load("results/TD_Lambda/M1/lambda_actor0.8/lambda_critic0.8/alphaActor_0.1_Scheduled_alphaCritic_0.1/testActor_episodes_50000/Actors/theta38000.npy")

In [None]:
Vlambda

In [None]:
VperLambda, eta = mb.sparse_T_CPU(softmax(thLambda, axis = 2), dataC, rSource, cSource, find_range, R, rho, 1)
VperLambda

In [None]:
for a in glob.glob("results/TD_Lambda/M1/lambda_actor*/lambda_critic*/alphaActor_*_Scheduled_alphaCritic_*/longRunCont_episodes_1000000/"):
    reg = re.compile("results/TD_Lambda/M1/lambda_actor(0\\.[0-9]*)/lambda_critic(0\\.[0-9]*)/alphaActor_([0-9]+\\.[0-9]*)(?:_Scheduled)?_alphaCritic_([0-9]+\\.[0-9]*)(?:_Scheduled)?/longRunCont_episodes_1000000/")
    print(a)
    print(reg.match(a).groups())

In [None]:
parents = glob.glob("results/TD_Lambda/M1/lambda_actor*/lambda_critic*/alphaActor_0.1_Scheduled_alphaCritic_*/*oPiccolo*/")
for parentDir in parents:
    print(parentDir)

In [None]:

parents = glob.glob("results/TD_Lambda/M1/lambda_actor*/lambda_critic*/alphaActor_0.1_Scheduled_alphaCritic_*/*oPiccolo*/")
for parentDir in parents:
    ls = glob.glob(parentDir+"Actors/theta*")
    totIter = len(ls) -2
    minTh = int(re.search("theta([0-9]+).npy", min(ls)).group(1))
    reg = re.compile("results/TD_Lambda/M1/lambda_actor(0\\.[0-9]*)/lambda_critic(0\\.[0-9]*)/alphaActor_([0-9]+\\.[0-9]*)(?:_Scheduled)?_alphaCritic_([0-9]+\\.[0-9]*)(?:_Scheduled)?/(.*)/")
    gr = reg.match(parentDir).groups()
    print(f"{gr}")
    print(f"Actor Lambda {gr[0]}; Lr {gr[2]}\nCritic Lambda {gr[1]}; Lr {gr[3]}; {gr[4]}")
    normDiff = np.zeros(totIter)
    diffFromOpt = np.zeros(totIter)
    obj = np.zeros(totIter)
    thetas = np.zeros(totIter)
    for i in range(0, totIter):
        th = np.load(parentDir + f"Actors/theta{minTh + i*1000}.npy")
        thetas[i] = np.linalg.norm(th)
        trueV, _ = mb.sparse_T_CPU(softmax(th, axis = 2), dataC, rSource, cSource, find_range, R, rho, 1)
        lambdaV = np.load(parentDir + f"Critics/critic{minTh + i*1000}.npy")
        obj[i] = np.dot(trueV, rho)
        normDiff[i] = np.linalg.norm(trueV - lambdaV, 2)
        diffFromOpt[i] = np.linalg.norm(lambdaV - Vopt, 2)
    plot_and_save(totIter, thetas, obj, normDiff, diffFromOpt, gr[:4],gr[4] )    
    

In [None]:
i = 15100
0.1 / (i - 15000) if i > 15000 else 0.1

In [None]:
plot_and_save(totIter, thetas, obj, normDiff, diffFromOpt, gr[:4], "scemo")

In [None]:
plt.plot(range(100), range(100), "k--")
plt.vlines(50, 0, 100, "r", "--")

In [None]:
def f(x):
    return 1 * 1000 / (1000 + x**(2/3))
def g(x):
    return 1 * 1000 / (1000 + x)
plt.plot(range(0, 50000, 1000), [f(a) for a in range(0,  50000, 1000)], label ="2/3")
plt.plot(range(0, 50000, 1000), [g(a) for a in range(0,  50000, 1000)], label ="1")
plt.hlines(0, *plt.xlim(), "k")
plt.legend()

In [None]:
f(3), g(3)