In [4]:
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import gym
import pandas as pd
import math
import openpyxl

## Wenn zeitlich Möglich gößeres NN mit wesentlich mehr Layern
class DeepQNet(nn.Module):
    def __init__(self, lr, input_dims, fc1_dims, fc2_dims, n_actions):
        
        super(DeepQNet, self).__init__()
        self.input_dims = input_dims
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims
        self.n_actions = n_actions
        self.fc1 = nn.Linear(*self.input_dims, self.fc1_dims)
        self.fc2 = nn.Linear(self.fc1_dims, self.fc2_dims)
        self.fc3 = nn.Linear(self.fc2_dims, self.n_actions)
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.loss = nn.MSELoss()
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        # self.device = T.device('cpu')
        self.to(self.device)
        
        
        # Verschiedene Aktivierungsfunktionen Ausprobieren
    def forward(self, s):
        
      
        tmp = F.sigmoid(self.fc1(s))    ##################
        tmp = F.sigmoid(self.fc2(tmp))
        actions = self.fc3(tmp)
        
        #print("Ende Action")
        return actions

class Agent():
    def __init__(self, gamma, epsilon, lr, input_dims, batch_size,
                 n_actions, mem_size=1000000, eps_end=0.05, eps_dec=4e-4):
        # print('Class Agent, input_dims: ', input_dims)
        self.gamma = gamma
        self.epsilon = epsilon
        self.eps_end = eps_end
        self.eps_dec = eps_dec
        self.lr = lr
        self.action_space = [i for i in range(n_actions)]
        self.mem_size = mem_size
        self.batch_size = batch_size
        self.mem_counter = 0
        #self.anzahl = i
        self.Q_eval = DeepQNet(self.lr, input_dims=input_dims, n_actions=n_actions,
                               fc1_dims=256, fc2_dims=128)
        self.state_memory = np.zeros((self.mem_size, *input_dims), dtype=np.float32)
        self.new_state_memory = np.zeros((self.mem_size, *input_dims), dtype=np.float32)
        self.action_memory = np.zeros(self.mem_size, dtype=np.int32)
        self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
        self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool)
    def store_transition(self, s, action, reward, s_, done):
        
        index = self.mem_counter % self.mem_size
        #print("Debug store_transition: ", s, "\n", 'self.state_memory[index]: ', self.state_memory[index])
        self.state_memory[index] = s
        self.new_state_memory[index] = s_
        self.reward_memory[index] = reward
        self.action_memory[index] = action
        self.terminal_memory[index] = done
        self.mem_counter += 1
        
    def choose_action(self, observation, verbleibende_Kapa):
        
        if verbleibende_Kapa < observation[0]:
            
            action = 0
        
        else:
            
            if np.random.random() > self.epsilon:        # Hier nach das < in  > umtauschen
                # In der neuen Variante: nächstes unbearbeitete/unerledigte Tuple + Aktuelle Kapazität an neuronales Netz geben

                # Übergeben des Spielfeldes aus der Beobachtung 
                s = T.tensor([observation.astype(np.single)]).to(self.Q_eval.device)
                #print('State s _choose_action_ :')
                #print(s)

                # Neue Variante: Neuronales Netz gibt gewichtetes Ja und Nein Zurück (nächsten Auftrag annehmen oder ablehnen)
                # Übergebe das Spielfeld dem Neuronalen Netz -> Bekomme Ja und Nein Zurück mit jeweiligen wahrscheinlichkeiten / Reward / Kosten
                actions = self.Q_eval.forward(s)        #   [[-1.0435,  0.6671]]


                action = T.argmax(actions).item()        # Entscheide zwischen Ja und nein auf basis wahrscheinlichkeiten / Reward / Kosten

            else:
                action = np.random.choice(self.action_space)

        return action
    
    
    def choose_action_Abwechlung(self, observation):                   

        action = np.random.choice(self.action_space)

        return action
    
    
    def learn(self):
        
        if self.mem_counter < self.batch_size:
            #print("XCbyc")
            return
        self.Q_eval.optimizer.zero_grad()
        #self.anzahl = i
        
        max_mem = min(self.mem_counter, self.mem_size)
        batch = np.random.choice(max_mem, self.batch_size, replace=False)
        batch_index = np.arange(self.batch_size, dtype=np.int32)
        state_batch = T.tensor(self.state_memory[batch]).to(self.Q_eval.device)
        new_state_batch = T.tensor(self.new_state_memory[batch]).to(self.Q_eval.device)
        reward_batch = T.tensor(self.reward_memory[batch]).to(self.Q_eval.device)
        terminal_batch = T.tensor(self.terminal_memory[batch]).to(self.Q_eval.device)
        action_batch = self.action_memory[batch]
        q_eval = self.Q_eval.forward(state_batch)[batch_index, action_batch]
        q_next = self.Q_eval.forward(new_state_batch)
        q_next[terminal_batch] = 0.0
        q_target = reward_batch + self.gamma * T.max(q_next, dim=1)[0]
        loss = self.Q_eval.loss(q_target, q_eval).to(self.Q_eval.device)
        #l1.append(loss.item())
        loss.backward()
        self.Q_eval.optimizer.step()
        
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_end \
            else self.eps_end
    def save_model(self, path):
        T.save({
            'model_state_dict': self.Q_eval.state_dict(),
            'optimizer_state_dict': self.Q_eval.optimizer.state_dict()
        }, path)
    def load_model(self, path):
        checkpoint = T.load(path)
        self.Q_eval.load_state_dict(checkpoint['model_state_dict'])
        self.Q_eval.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.Q_eval.to(self.Q_eval.device)
 
class Wendtris(gym.Env):
    metadata = {'render.modes': ['human']}
    # def __init__(self, *args, **kwargs):
    # self.reset( *args, **kwargs)
    def __init__(self, episode_length=10, \
                 ressource_capacity=30, min_rev=1, max_rev=9, legacy_model=False):
        super(Wendtris, self)
        self.episode_length = episode_length
        #self.ressource_types = ressource_types
        self.ressource_capacity = ressource_capacity
        self.min_rev = min_rev
        self.max_rev = max_rev
        self.legacy_model = legacy_model
        self.Auftraege_pro_Epoche 
        
        self.action_space = [0, 1]  ## 0-decline 1-accept
        
        # Dateipfad zur Excel-Datei angeben
        excel_file = 'Instan1_aus_script.xlsx'

        # Daten aus der Excel-Datei in ein pandas DataFrame laden
        data = pd.read_excel(excel_file)

        # Liste initialisieren, um die Daten zu speichern
        self.Alle_Auftraege_insgesammt= []

        # Schleife, um die Zeilen der Excel-Datei zu durchlaufen
        for i, row in data.iterrows():
            # Wenn die Anzahl der Zeilen in der Liste episode_length erreicht hat, eine neue Liste hinzufügen
            if i % self.episode_length == 0:
                self.Alle_Auftraege_insgesammt.append([])
            # Daten aus der aktuellen Zeile hinzufügen
            self.Alle_Auftraege_insgesammt[-1].append(row.tolist())
       
        self.Anzahl_der_Epochen = len(self.Alle_Auftraege_insgesammt)
        #print(self.Alle_Auftraege_insgesammt)
        
    def reset_all(self):
        
        
        self.current_step = 0
        self.recent_reward = 0
        self.total_reward = 0
        self.total_lost_reward = 0
        self.requests_accepted = 0
        self.requests_declined = 0
        self.state_capacity = 30
        self.vorheriges_X = 0
        self.vorheriges_Y = 0
        self.done = True 
        print("reset_all")
        self.Belohnung = []
        self.Kapazitaet = []
        self.Koordinate = []
        
        return(self.Alle_Auftraege_insgesammt)
 
    def Auftraege_pro_Epoche(self, i):
        
        Auftraege_pro_Epoche = self.Alle_Auftraege_insgesammt[i]
        #print("Epoche")
        #print(i)
        #print(Auftraege_pro_Epoche)
        return(self.Auftraege_pro_Epoche)    
            
    def reset(self):
        # ERzeuge neues Spielfeld, evtl. initiale Kapazität und initialen Standpunkt
        # [(R, x,y, K), (R,x,y,K),..]
        #print("Reset:")
        
        


        
        self.current_step = 0
        self.recent_reward = 0
        self.total_reward = 0
        self.total_lost_reward = 0
        self.requests_accepted = 0
        self.requests_declined = 0
        self.state_capacity = 30
        self.vorheriges_X = 700
        self.vorheriges_Y = 99
        self.done = True 
        #Liste_Belohnungen = 
        
        


        
        self.Auftrag = self.Alle_Auftraege_insgesammt[0][Epoche]
        
        self.Belohnung = []
        self.Kapazitaet = []
        self.Koordinate = []
        
        
        

        for Auftrag in self.Alle_Auftraege_insgesammt:
                c_Belohnung = np.array([sub_arr[1] for sub_arr in Auftrag])
                self.Belohnung.append(c_Belohnung)
                tensor = np.array([sub_arr[0] for sub_arr in Auftrag])
                self.Kapazitaet.append(tensor)
                x_koordinate = np.array([sub_arr[2] for sub_arr in Auftrag])
                y_koordinate = np.array([sub_arr[3] for sub_arr in Auftrag])

                kord = [x_koordinate,y_koordinate]
                #print(kord)
                self.Koordinate.append(kord)
      
            
        #print(self.Alle_Auftraege_insgesammt)
    #    print(self.Belohnung)
      #  print(self.Koordinate)
        #print("Kapa: ")
        #print(self.Kapazitaet)
        
        
        return (self.observe())
    
    
    def observe(self, Auftrag, temp_Auftragsnummer, verbleibende_Kapa, aktuelles_X, aktuelles_Y ):
        
        #self.observation = np.append(self.Alle_Auftraege_insgesammt[self.current_step], self.state_capacity)
        
        
        # einfach das erzeuge Spielfeld + aktuelle Position + aktuell verbrauchte Kapazität(schrumpft mit jedem angennommenen Auftrag) zurückgeben
        self.observation = np.append(Auftrag, verbleibende_Kapa)

        requests_remaining = self.episode_length - temp_Auftragsnummer
        
        #speichert nach observation noch die verbleibende Anzahl von Aufträgen, die vorherige X und Y koordinate
        self.observation = np.append(self.observation, [requests_remaining, \
                                                        aktuelles_X, aktuelles_Y])

        return self.observation
    
     
    def step(self,action,observation, Auftrag,verbleibende_Kapa,Auftrags_nummer,aktuelles_X, aktuelles_Y):
        

        # Hier wird die Kapazität abgezogen

        
        #Hier kommt die Abfrage nach der Action hin If action = 1 reward += ....
       
        temp_Auftragsnummer =  Auftrags_nummer + 1

        
        
        if action == 1:
            
            verbleibende_Kapa -=  observation[0]
            

        observation = env.observe(Auftrag, temp_Auftragsnummer, verbleibende_Kapa, aktuelles_X, aktuelles_Y )
        

        return observation, self.done, '', verbleibende_Kapa
    

env = Wendtris(legacy_model=False)
## Hier die InputDimensionanpassen
agent = Agent(gamma=0.99, epsilon= 0.0, lr=0.004, input_dims=[8], batch_size=64, n_actions=2, eps_dec=2e-6)
print("Anfan1g")

Alle_daten = env.reset_all()  
Anz_Auftraege_pro_Epoche = len(Alle_daten[1])

Vefuegbare_Kapa = 100
max_score = 0


score_Liste = []
#print("Alle DAten")
#print(Alle_daten)

scores, avg_scores_50, eps_history = [], [], []
bisheriger_Best_score = 0

agent.load_model("Neu_Trainingsdaten_A_Kapa100_tanh_Standart_10_Auftraege.zip")


Liste_score = []
Liste_action = []
Liste_best_action = []

#print(Alle_daten[15])
for i in range(100-1):
    
    print(i)
    reward=0
    verbleibende_Kapa = Vefuegbare_Kapa
    vorheriges_X = 0
    vorheriges_Y = 0
    aktuelles_X= 0
    aktuelles_Y = 0

    
    Auftraege_pro_Epoche = Alle_daten[i]
    
    
    Ursprung = (0,0)
    
    score = 0
    
    max_score = 0
  
    Auftrags_nummer = 0  # Nummer des jeweiligen auftrags
    
  
    observation = env.observe(Auftraege_pro_Epoche[0], Auftrags_nummer, verbleibende_Kapa, aktuelles_X, aktuelles_Y )
    #print("observation zu beginn")
    print(observation)
    zähler = 0
    Liste_action = []
    
    while Auftrags_nummer < Anz_Auftraege_pro_Epoche:
        
        zähler+=1 
        Auftrag = Auftraege_pro_Epoche[Auftrags_nummer]
        #Auftrag_next = Auftraege_pro_Epoche[Auftrags_nummer+1]
        #print(Auftrag)
        
        
        action = agent.choose_action(observation, verbleibende_Kapa )
       # print("Action")
        #print(action)
        Liste_action.append(action)
        
        if action == 1:
            
            aktuelles_X = observation[2]
            aktuelles_Y = observation[3]
            Aktueller_Punkt_ = (observation[6],observation[7])
        
            belohnung_new = observation[1]

            Kosten = math.dist(Aktueller_Punkt_,(observation[2],observation[3]))

            reward_new = belohnung_new - Kosten 

            score += reward_new
            
            if Auftrags_nummer == Anz_Auftraege_pro_Epoche-1:
                Auftrag = [0,0,0,0]
            else:
                Auftrag = Auftraege_pro_Epoche[Auftrags_nummer+1]


        observation_, done, info ,verbleibende_Kapa = env.step(action,observation,Auftrag,verbleibende_Kapa,\
                                                               Auftrags_nummer,aktuelles_X, aktuelles_Y)
        

        
        print(observation)

        
        if Auftrags_nummer == Anz_Auftraege_pro_Epoche-1 :
            
            
            #print("Score")
            #print(score)
            endpunkt = (aktuelles_X,aktuelles_Y)
            #print(endpunkt)
            score -= math.dist(Ursprung, endpunkt)
            #print(score)


            
        agent.store_transition(observation, action, \
                               score, observation_, done)        
        
        
        agent.learn()
            
        observation = observation_

        
        
        Auftrags_nummer +=1 
        
        
    if score > max_score:
            max_score = score
            
            
    if max_score > bisheriger_Best_score:
            bisheriger_Best_score = max_score   
        
        
    Liste_score.append(score)
        
    Liste_best_action.append(Liste_action)
    
    
print(Alle_daten[3])
print(Liste_score[3])
    
#print(Liste_action)
#print(Liste_best_action)


    ####
    #####
    ####   Entfernen, damit NN auch wirklich gespeichert wird
    #np.savetxt('scores_and_eps_Trainingsdaten_A_Kapa100_sigmoid_Standart_10_Auftraege.csv', history, delimiter=',', \
               #header="scores,avg_scores_50,epsilon", fmt='%f')

        
# Neue Excel-Datei erstellen
wb = openpyxl.Workbook()

# Arbeitsblatt auswählen
ws = wb.active

# Überschriften für die Spalten setzen
ws['A1'] = 'Score'
for col in range(2, 12):
    ws.cell(row=1, column=col).value = f'Action {col-1}'

# Daten in die Excel-Tabelle schreiben
for row in range(2, 32):
    # Score-Wert in die erste Spalte schreiben
    ws.cell(row=row, column=1).value = Liste_score[row-2]
    
    # Action-Werte in die restlichen Spalten schreiben
    for col in range(2, 12):
        ws.cell(row=row, column=col).value = Liste_best_action[row-2][col-2]

# Excel-Datei speichern
wb.save('Neu_Trainingsdaten_A_Kapa100_tanh_Standart_10_Auftraege.xlsx')    


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool)
  s = T.tensor([observation.astype(np.single)]).to(self.Q_eval.device)


Anfan1g
reset_all
0
[ 19  95   2  39 100  10   0   0]
[ 19  95   2  39 100  10   0   0]
Debug store_transition:  [ 19  95   2  39 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95   2  39 100   9   0   0]
Debug store_transition:  [ 19  95   2  39 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   8   0   0]
Debug store_transition:  [ 21 105  49   8 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95  96  44 100   7   0   0]
Debug store_transition:  [ 19  95  96  44 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   6   0   0]
Debug store_transition:  [  7  35  84  25 100   6   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   5   0   0]
Debug store_transition:  [ 21 105  49   8 100   5   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  6  30  13   7 100   4   0   0]
Debug store_transition: 

9
[  7  35  84  25 100  10   0   0]
[  7  35  84  25 100  10   0   0]
Debug store_transition:  [  7  35  84  25 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   9   0   0]
Debug store_transition:  [  7  35  84  25 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 12  60   5  10 100   8   0   0]
Debug store_transition:  [ 12  60   5  10 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95   2  39 100   7   0   0]
Debug store_transition:  [ 19  95   2  39 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 12  60   5  10 100   6   0   0]
Debug store_transition:  [ 12  60   5  10 100   6   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  6  30  14  24 100   5   0   0]
Debug store_transition:  [  6  30  14  24 100   5   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 12  60   5  10 100   4   0   0]
Debug store_transition:  [ 12  60   5  10 

[  6  30  14  24 100   1   0   0]
Debug store_transition:  [  6  30  14  24 100   1   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
15
[  6  30  14  24 100  10   0   0]
[  6  30  14  24 100  10   0   0]
Debug store_transition:  [  6  30  14  24 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  6  30  14  24 100   9   0   0]
Debug store_transition:  [  6  30  14  24 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  6  30  14  24 100   8   0   0]
Debug store_transition:  [  6  30  14  24 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   7   0   0]
Debug store_transition:  [ 21 105  49   8 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95   2  39 100   6   0   0]
Debug store_transition:  [ 19  95   2  39 100   6   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95  96  44 100   5   0   0]
Debug store_transition:  [ 19  95  96  44

[ 21 105  49   8 100   8   0   0]
Debug store_transition:  [ 21 105  49   8 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   7   0   0]
Debug store_transition:  [  7  35  84  25 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 16  80  50   5 100   6   0   0]
Debug store_transition:  [ 16  80  50   5 100   6   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   5   0   0]
Debug store_transition:  [  7  35  84  25 100   5   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 12  60   5  10 100   4   0   0]
Debug store_transition:  [ 12  60   5  10 100   4   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95   2  39 100   3   0   0]
Debug store_transition:  [ 19  95   2  39 100   3   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 12  60   5  10 100   2   0   0]
Debug store_transition:  [ 12  60   5  10 100   2   0   0] 
 self.state_memory

[  6  30  13   7 100   1   0   0]
Debug store_transition:  [  6  30  13   7 100   1   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
28
[ 19  95  96  44 100  10   0   0]
[ 19  95  96  44 100  10   0   0]
Debug store_transition:  [ 19  95  96  44 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95  96  44 100   9   0   0]
Debug store_transition:  [ 19  95  96  44 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   8   0   0]
Debug store_transition:  [  7  35  84  25 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   7   0   0]
Debug store_transition:  [ 21 105  49   8 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95   2  39 100   6   0   0]
Debug store_transition:  [ 19  95   2  39 100   6   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   5   0   0]
Debug store_transition:  [ 21 105  49   8

[  6  30  14  24 100   2   0   0]
Debug store_transition:  [  6  30  14  24 100   2   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  6  30  13   7 100   1   0   0]
Debug store_transition:  [  6  30  13   7 100   1   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
34
[ 21 105  49   8 100  10   0   0]
[ 21 105  49   8 100  10   0   0]
Debug store_transition:  [ 21 105  49   8 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   9   0   0]
Debug store_transition:  [ 21 105  49   8 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   8   0   0]
Debug store_transition:  [ 21 105  49   8 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95   2  39 100   7   0   0]
Debug store_transition:  [ 19  95   2  39 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   6   0   0]
Debug store_transition:  [  7  35  84  25

[ 19  95   2  39 100   7   0   0]
Debug store_transition:  [ 19  95   2  39 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 81  6  2 39]
Debug store_transition:  [ 6 30 13  7 81  6  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 81  5  2 39]
Debug store_transition:  [ 6 30 13  7 81  5  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[12 60  5 10 81  4  2 39]
Debug store_transition:  [12 60  5 10 81  4  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 69  3  5 10]
Debug store_transition:  [ 6 30 13  7 69  3  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 69  2  5 10]
Debug store_transition:  [ 6 30 13  7 69  2  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95  2 39 69  1  5 10]
Debug store_transition:  [19 95  2 39 69  1  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
42
[ 19  95   2  39 100  10   0   0]
[ 19  95   2  39 100  1

[ 6 30 14 24 52  4 49  8]
Debug store_transition:  [ 6 30 14 24 52  4 49  8] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 52  3 49  8]
Debug store_transition:  [ 6 30 14 24 52  3 49  8] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[12 60  5 10 52  2 49  8]
Debug store_transition:  [12 60  5 10 52  2 49  8] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 40  1  5 10]
Debug store_transition:  [16 80 50  5 40  1  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
48
[ 19  95  96  44 100  10   0   0]
[ 19  95  96  44 100  10   0   0]
Debug store_transition:  [ 19  95  96  44 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 81  9 96 44]
Debug store_transition:  [16 80 50  5 81  9 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 65  8 50  5]
Debug store_transition:  [16 80 50  5 65  8 50  5] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 49  7 50  

[19 95  2 39 74  4 84 25]
Debug store_transition:  [19 95  2 39 74  4 84 25] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 55  3  2 39]
Debug store_transition:  [ 6 30 13  7 55  3  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95 96 44 49  2 13  7]
Debug store_transition:  [19 95 96 44 49  2 13  7] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95 96 44 49  1 13  7]
Debug store_transition:  [19 95 96 44 49  1 13  7] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
55
[ 19  95  96  44 100  10   0   0]
[ 19  95  96  44 100  10   0   0]
Debug store_transition:  [ 19  95  96  44 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95  2 39 81  9 96 44]
Debug store_transition:  [19 95  2 39 81  9 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 62  8  2 39]
Debug store_transition:  [16 80 50  5 62  8  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 46  7 50  

[12 60  5 10 33  6 96 44]
Debug store_transition:  [12 60  5 10 33  6 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[12 60  5 10 33  5 96 44]
Debug store_transition:  [12 60  5 10 33  5 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 33  4 96 44]
Debug store_transition:  [ 6 30 14 24 33  4 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 33  3 96 44]
Debug store_transition:  [ 6 30 14 24 33  3 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[12 60  5 10 33  2 96 44]
Debug store_transition:  [12 60  5 10 33  2 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[12 60  5 10 33  1 96 44]
Debug store_transition:  [12 60  5 10 33  1 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
62
[ 12  60   5  10 100  10   0   0]
[ 12  60   5  10 100  10   0   0]
Debug store_transition:  [ 12  60   5  10 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95 96 44 88  9  5 1

[ 6 30 14 24  0  3 96 44]
Debug store_transition:  [ 6 30 14 24  0  3 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24  0  2 96 44]
Debug store_transition:  [ 6 30 14 24  0  2 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5  0  1 96 44]
Debug store_transition:  [16 80 50  5  0  1 96 44] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
68
[  7  35  84  25 100  10   0   0]
[  7  35  84  25 100  10   0   0]
Debug store_transition:  [  7  35  84  25 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   9   0   0]
Debug store_transition:  [  7  35  84  25 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8 100   8   0   0]
Debug store_transition:  [ 21 105  49   8 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 79  7 49  8]
Debug store_transition:  [16 80 50  5 79  7 49  8] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0

[ 6 30 14 24  3  1  5 10]
Debug store_transition:  [ 6 30 14 24  3  1  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
75
[ 19  95  96  44 100  10   0   0]
[ 19  95  96  44 100  10   0   0]
Debug store_transition:  [ 19  95  96  44 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 19  95  96  44 100   9   0   0]
Debug store_transition:  [ 19  95  96  44 100   9   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  7  35  84  25 100   8   0   0]
Debug store_transition:  [  7  35  84  25 100   8   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[  6  30  13   7 100   7   0   0]
Debug store_transition:  [  6  30  13   7 100   7   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 94  6 13  7]
Debug store_transition:  [16 80 50  5 94  6 13  7] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 78  5 50  5]
Debug store_transition:  [16 80 50  5 78  5 50  5] 
 self.state_memory[index]:  [

[ 6 30 14 24 49  4 50  5]
Debug store_transition:  [ 6 30 14 24 49  4 50  5] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8  43   3  14  24]
Debug store_transition:  [ 21 105  49   8  43   3  14  24] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95  2 39 22  2 49  8]
Debug store_transition:  [19 95  2 39 22  2 49  8] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8   3   1   2  39]
Debug store_transition:  [ 21 105  49   8   3   1   2  39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
83
[ 19  95   2  39 100  10   0   0]
[ 19  95   2  39 100  10   0   0]
Debug store_transition:  [ 19  95   2  39 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 7 35 84 25 81  9  2 39]
Debug store_transition:  [ 7 35 84 25 81  9  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 7 35 84 25 81  8  2 39]
Debug store_transition:  [ 7 35 84 25 81  8  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0

[16 80 50  5 10  3  5 10]
Debug store_transition:  [16 80 50  5 10  3  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[16 80 50  5 10  2  5 10]
Debug store_transition:  [16 80 50  5 10  2  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8  10   1   5  10]
Debug store_transition:  [ 21 105  49   8  10   1   5  10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
89
[ 16  80  50   5 100  10   0   0]
[ 16  80  50   5 100  10   0   0]
Debug store_transition:  [ 16  80  50   5 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[19 95  2 39 84  9 50  5]
Debug store_transition:  [19 95  2 39 84  9 50  5] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 13  7 65  8  2 39]
Debug store_transition:  [ 6 30 13  7 65  8  2 39] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 59  7 13  7]
Debug store_transition:  [ 6 30 14 24 59  7 13  7] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 

[19 95 96 44 61  4  5 10]
Debug store_transition:  [19 95 96 44 61  4  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8  61   3   5  10]
Debug store_transition:  [ 21 105  49   8  61   3   5  10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 40  2 49  8]
Debug store_transition:  [ 6 30 14 24 40  2 49  8] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8  34   1  14  24]
Debug store_transition:  [ 21 105  49   8  34   1  14  24] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
95
[ 12  60   5  10 100  10   0   0]
[ 12  60   5  10 100  10   0   0]
Debug store_transition:  [ 12  60   5  10 100  10   0   0] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 6 30 14 24 88  9  5 10]
Debug store_transition:  [ 6 30 14 24 88  9  5 10] 
 self.state_memory[index]:  [0. 0. 0. 0. 0. 0. 0. 0.]
[ 21 105  49   8  82   8  14  24]
Debug store_transition:  [ 21 105  49   8  82   8  14  24] 
 self.state_memory[index]:  [

SyntaxError: invalid syntax (4012691780.py, line 1)