In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
from itertools import product
import pandas as pd
import seaborn as sns

In [83]:
def constroe_estados(profundidade):
    estados = {}
    estados_list = []
    for v in range(profundidade):
        aux = list(product([v],list(range(0,v+1))))
        estados[str(v)] =  aux
        estados_list = estados_list + aux

    estados[str(profundidade)] = [(-1,-1)]  
    estados_list = estados_list+[(-1,-1)]
    return estados,estados_list

def proximos(estado,profundidade):
    atingiveis = {}
    if estado[0]<profundidade and estado[0]>=0:
        atingiveis["estados"] = [(estado[0]+1,estado[1]+1),(estado[0]+1,0),(estado[0]+1,0),(0,0)]
        atingiveis["acoes"] = [0,0,1,2]

        aux_q = pq0/(pq0+(1-pq0)*np.exp(-alfa*estado[1]))
        
        atingiveis["probs"] = [1-aux_q,aux_q,1,1]
        atingiveis["recs"] = recompensa(estado)
    elif estado[0]==-1 or estado[0]==profundidade:
        atingiveis["estados"] = [(-1,-1),(-1,-1),(-1,-1),(-1,-1)]
        atingiveis["acoes"] = [0,0,1,2]
        atingiveis["probs"] = [1,0,1,1] 
        atingiveis["recs"] = [0,0,0,0]   

    return atingiveis

def anteriores(estado):
    if estado[1]>0:
        ants = [(estado[0]-1,estado[1]-1)]
    elif estado[1]==0 and estado[0]>0:
        ants = list(product([estado[0]-1],list(range(0,estado[0]-1))))
        
    return ants

def transicao(estado1,estado2,acao,profundidade):
    if acao == 0:
        aux_q = pq0/(pq0+(1-pq0)*np.exp(-alfa*estado1[1]))
        
        pq = aux_q   
        probs = 1-pq
        dx0 = estado2[0]-estado1[0]
        if dx0 == 1:
            dy0 = estado2[1]-estado1[1]
            if dy0 == 1:
                pr = probs
            elif estado2[1]==0:
                pr = pq 
            else:
                pr = 0  
        else:
            pr = 0
            
    elif acao == 1:
        dx0 = estado2[0]-estado1[0]
        if dx0==1 and estado1[0]==0:
            pr = 1
        else:
            pr = 0
        
    elif acao == 2:
        if estado2[1] == 0 and estado2[0] == 0:
            pr = 1
        else:
            pr = 0
    else:
        pr = 0
    
    if estado2[0]==-1:
        if estado1[0] == profundidade:
            pr = 1
        else:
            pr = 0
    if estado1[0]==-1:
        if estado2[0] == -1:
            pr = 1
        else:
            pr = 0
    return pr

def recompensa(estado):
    tvida,trev = estado[0],estado[1]

    valor_equipamento = np.exp(-0.005*tvida)
    valor_manutencao = 0.02 - 0.01*np.exp(-0.1*tvida)

    # pq = pq0/(pq0+(1-pq0)*np.exp(-alfa*trev))
    valor_defeito = -2*valor_manutencao/valor_equipamento
        
    res_a01,res_a02 = 0.01,valor_defeito

    res_a1 = -valor_manutencao/valor_equipamento
    
    res_a2 = -(1 - 0.8*valor_equipamento)
    
    res = [res_a01,res_a02,res_a1,res_a2]
    if estado[0]==-1:
        res = [0,0,0,0]
    return res


pq0, alfa = 0.01 , 0.4

def cria_politica(lista_estados):
    politica = {}
    for est in lista_estados:
        politica[est] = [1,0,0]
    return politica

def avalia_politica(sims, lista_estados,politica):
    tol = 1e-5
    prof = lista_estados[-2][0]
    
    VN = {v:1-2*random.random() for v in lista_estados}
    VN[(-1,-1)]=0
    
    cnt, nor = 0, 10
    
    M = {}
    M[0] = np.array(list(V.values()))
    while nor>tol and cnt <= sims:
        
        V = {k:v for k,v in VN.items()}
        VN = {}

        for est in lista_estados:
            next = proximos(est,prof) 
            nest = next["estados"]
            probs = next["probs"]
            recs = next["recs"]
            pols = politica[est]

            v_a0 = probs[0]*(recs[0]+V[nest[0]])+probs[1]*(recs[1]+V[nest[1]])
            v_a1 = probs[2]*(recs[2]+V[nest[2]])
            v_a2 = probs[3]*(recs[3]+V[nest[3]])

            VN[est] = pols[0]*v_a0+pols[1]*v_a1+pols[2]*v_a2
        
        cnt = cnt+1
        M[cnt] = np.array(list(VN.values()))

        nor = np.linalg.norm(M[cnt]-M[cnt-1])
        # aux.append(nor)
    return M

def aprimora_politica(lista_estados,politica,sims = 50):
    tol = 1e-5
    prof = lista_estados[-2][0]
    
    VN = {v:1-2*random.random() for v in lista_estados}
    VN[(-1,-1)]=0
    
    # politica = {est:0 for v in lista_estados}

    P,M = {},{}
    PLN = {k:v for k,v in politica.items()}

    P[0] = politica
    
    cnt_pol = 0

    estavel = False

    while not estavel and cnt_pol<500:
        cnt_pol = cnt_pol+1
        
        PL = {k:v for k,v in PLN.items()}
        PLN = {}
        
        cnt, nor = 0, 10
        while nor>tol and cnt <= sims:
            
            V = {k:v for k,v in VN.items()}
            VN = {}

            for est in lista_estados:
                next = proximos(est,prof) 
                nest = next["estados"]
                probs = next["probs"]
                recs = next["recs"]
                pols = politica[est]

                v_a0 = probs[0]*(recs[0]+V[nest[0]])+probs[1]*(recs[1]+V[nest[1]])
                v_a1 = probs[2]*(recs[2]+V[nest[2]])
                v_a2 = probs[3]*(recs[3]+V[nest[3]])

                VN[est] = pols[0]*v_a0+pols[1]*v_a1+pols[2]*v_a2
            
            cnt = cnt+1
            nor = np.linalg.norm(np.array(list(V.values()))-np.array(list(VN.values())))
        
        V = {k:v for k,v in VN.items()}
        M[cnt_pol-1] = {k:v for k,v in VN.items()}
        mud = 0
        for est in lista_estados:
            next = proximos(est,prof) 
            nest = next["estados"]
            probs = next["probs"]
            recs = next["recs"]
            pols = politica[est]
            v_a0 = probs[0]*(recs[0]+V[nest[0]])+probs[1]*(recs[1]+V[nest[1]])
            v_a1 = probs[2]*(recs[2]+V[nest[2]])
            v_a2 = probs[3]*(recs[3]+V[nest[3]])
            idx_pol_max = np.argmax([v_a0,v_a1,v_a2])
            aux_pol = [0,0,0]
            aux_pol[idx_pol_max]=1
            PLN[est] = aux_pol
            
            if not pols[idx_pol_max] == 1:
                mud = mud+1

        if mud > 0:
            estavel = False
        else:
            estavel = True
            
        politica = {k:v for k,v in PLN.items()}
        P[cnt_pol] = {k:v for k,v in PLN.items()}

    cnt, nor = 0, 10
    while nor>tol and cnt <= sims:
        
        V = {k:v for k,v in VN.items()}
        VN = {}

        for est in lista_estados:
            next = proximos(est,prof) 
            nest = next["estados"]
            probs = next["probs"]
            recs = next["recs"]
            pols = politica[est]
            
            v_a0 = probs[0]*(recs[0]+V[nest[0]])+probs[1]*(recs[1]+V[nest[1]])
            v_a1 = probs[2]*(recs[2]+V[nest[2]])
            v_a2 = probs[3]*(recs[3]+V[nest[3]])

            VN[est] = pols[0]*v_a0+pols[1]*v_a1+pols[2]*v_a2

        cnt = cnt+1
        nor = np.linalg.norm(np.array(list(V.values()))-np.array(list(VN.values())))
    
    M[cnt_pol] = {k:v for k,v in VN.items()}
    
    return P,M


def value_iteration(lista_estados,politica,sims = 500):
    tol = 1e-5
    prof = lista_estados[-2][0]
    
    VN = {v:0.05 for v in lista_estados}
    VN[(-1,-1)] = 0

    PLN = {k:np.argmax(v) for k,v in politica.items()}
    
    M,P = {},{}
    M[0] = np.array(list(VN.values()))
    P[0] = {k:v for k,v in PLN.items()}
    
    cnt, nor = 0, 10
    while nor>tol and cnt <= sims:
        
        PL = {k:v for k,v in PLN.items()}
        PLN = {}
        
        V = {k:v for k,v in VN.items()}
        VN = {}

        for est in lista_estados:
            next = proximos(est,prof) 
            nest = next["estados"]
            probs = next["probs"]
            recs = next["recs"]
        
            v_a0 = probs[0]*(recs[0]+V[nest[0]])+probs[1]*(recs[1]+V[nest[1]])
            v_a1 = probs[2]*(recs[2]+V[nest[2]])
            v_a2 = probs[3]*(recs[3]+V[nest[3]])

            VN[est] = np.max([v_a0,v_a1,v_a2])
            PLN[est] = np.argmax([v_a0,v_a1,v_a2])
            
        cnt = cnt+1
        nor = np.linalg.norm(np.array(list(V.values()))-np.array(list(VN.values())))
        M[cnt] = np.array(list(VN.values()))
        P[cnt] = {k:v for k,v in PLN.items()}
    
    return P, M

def gera_episodio(prof,T,politica,est0):

    est = (est0[0],est0[1])
    epi,t = [],0
    while t<T:
        t = t+1
        polis = np.cumsum(politica[est])
        aux_ac = random.random()
        ac = np.sum([int(x<aux_ac) for x in polis])
        nxt = proximos(est,prof)
        if ac == 0:
            aux_n = random.random()
            if aux_n <= nxt["probs"][0]:
                rec = nxt["recs"][0]
                nest = nxt["estados"][0]
            else:
                rec = nxt["recs"][1]
                nest = nxt["estados"][1]
        elif ac == 1:
            rec = nxt["recs"][2]
            nest = nxt["estados"][2]
        elif ac == 2:
            rec = nxt["recs"][3]
            nest = nxt["estados"][3]

        epi.append([est,ac,rec])
        est = (nest[0],nest[1])

    
    return epi

def FirstVisitMC(lista_estados,T):
    polis = cria_politica(lista_estados)
    V = {v:0.05 for v in lista_estados}
    Ret = {v:0 for v in lista_estados}
    
    prof = lista_estados[-2][0]
    epi = gera_episodio(prof,T,polis,(0,0))

    for t in range(len(epi)-1,-1,-1):





    

    


SyntaxError: incomplete input (3446586858.py, line 338)

In [78]:
prof = 20
_, est_list = constroe_estados(prof)
polis = cria_politica(est_list)
gera_episodio(prof,20,polis,(0,0),neps = 10)

[[(0, 0), 0, 0.01],
 [(1, 1), 0, 0.01],
 [(2, 2), 0, 0.01],
 [(3, 3), 0, 0.01],
 [(4, 4), 0, 0.01],
 [(5, 5), 0, 0.01],
 [(6, 6), 0, 0.01],
 [(7, 7), 0, 0.01],
 [(8, 8), 0, 0.01],
 [(9, 9), 0, 0.01],
 [(10, 10), 0, -0.03431602338595094],
 [(11, 0), 0, 0.01],
 [(12, 1), 0, 0.01],
 [(13, 2), 0, 0.01],
 [(14, 3), 0, 0.01],
 [(15, 4), 0, 0.01],
 [(16, 5), 0, 0.01],
 [(17, 6), 0, 0.01],
 [(18, 7), 0, 0.01],
 [(19, 8), 0, 0.01]]

In [None]:
prof = 20
_, est_list = constroe_estados(prof)
polis = cria_politica(est_list)
# M = avalia_politica(50,est_list,politica)
P, M = aprimora_politica(est_list,polis)
x = [a[0] for a,b in zip(P[0].items(),P[3].items()) if not np.argmax(a[1])==np.argmax(b[1])]
 

In [82]:
list(range(10-1,-1,-1))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

In [53]:
polis = np.cumsum([0.5,0.25,0.25])
aux_ac = random.random()

ac

[0, 0, 0]

In [54]:
aux_ac 

0.47984807443725486

In [None]:
P[1]

In [None]:
M[1]

In [None]:
prof = 20
_, est_list = constroe_estados(prof)
polis = cria_politica(est_list)
# M = avalia_politica(50,est_list,politica)
P, M = value_iteration(est_list,polis)
# x = [a[0] for a,b in zip(P[0].items(),P[].items()) if not a[1]==b[1]]

In [None]:
x = [a[0] for a,b in zip(P[0].items(),P[1].items()) if not a[1]==b[1]]
x

In [None]:
est_list

In [None]:
x = [a[0] for a,b in zip(P[0].items(),P[].items()) if not a[1]==b[1]]

In [None]:
vals = [v[0] for k,v in M.items()]
sns.lineplot(vals)

In [None]:
estado = (5,5)
sims = 100
politicas = list(product([0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2],[0,1,2]))
tpol = len(politicas)
M = {}
Mr = {}
Ms = {}
media = [0]*tpol

for i in range(sims):
    print(i,end="\r")
    # rec_p = []
    rec_ac_p = []
    # rec_s = []
    for p in politicas:
        rec = []
        # rec_es = [estado]
        cnt = 0
        est = (estado[0],estado[1])
        for x in p:
            auxr = recompensa(est,x)
            # rec.append(auxr)
            cnt = cnt + auxr
            if x==0:
                aux_q = pq0/(pq0+(1-pq0)*np.exp(-alfa*est[1]))
                if random.random()>aux_q:
                    est = (est[0]+1,est[1]+1)
                else:
                    est = (est[0]+1,0)
            elif x==1:
                est = (est[0]+1,0)
            elif x==2:
                est = (0,0)
            # rec_es.append(est)
        rec_ac_p.append(cnt)
        # rec_p.append(rec)
        # rec_s.append(rec_es)
    M[i] = rec_ac_p
    # Mr[i] = rec_p
    # Ms[i] =  rec_s
    media = [media[j]+rec_ac_p[j]/sims for j in range(tpol)]

[politicas[np.argmax(media)],np.max(media)]


            

    

In [None]:
# med = [0]*len(politicas)
zz = []
for i in range(sims):
    # med = [med[j]+0.1*M[i][j] for j in range(len(politicas))]
    aux = np.argmax(politicas[np.argmax(M[i])])
    zz.append(aux)
    # print(politicas[np.argmax(M[i])],np.max(M[i]))

[z for z in zz if z>0]

In [None]:
[z for z in zz if z>0]

In [None]:
prof = 30
est_dicio, est_list = constroe_estados(prof)
recs = []

aux1 = []
aux2 = []
aux3 = []

for x in est_dicio["28"]:
    aux1.append(recompensa(x,0))
    aux2.append(recompensa(x,1))
    aux3.append(recompensa(x,2))


In [None]:
anteriores((29,0))

In [None]:
recompensa((10,10),0)
estado1 = []
estado2 = []
acao = []
probs = []
for x in est_list:
    
    

In [None]:
prof = 30
est_dicio, est_list = constroe_estados(prof)

In [None]:
est_dicio["29"]

In [None]:
prof = 30
_ , est_list = constroe_estados(prof)
estado1 = []
estado2 = []
acao = []
probs = []
for x in est_list:
    for y in est_list:
        estado1.append(x)
        estado2.append(y)
        acao.append(0)
        probs.append(transicao(x,y,0))

df = pd.DataFrame({'estado origem':estado1,'estado_chegada':estado2,'acao':acao,"probs":probs})
df

In [None]:
df[df.probs>0]

In [None]:
transicao((3,0),(4,2),0)

In [None]:
proximos((4,0))

In [None]:
pd.DataFrame(proximos((4,0)))