# Librerías

In [136]:
import numpy as np
import pandas as pd
import random

# Funciones y variables globales

In [137]:
# Definción de los puntos de reaparición de las cosechadoras 
puntos_reaparicion = [(0,0), (0,1), (0,2), (0,3), (0,4), (0,5), (0,6), (0,7), (0,8), (0,9), (0,10), (0,11),
                      (1,0), (1,1), (1,2), (1,3), (1,4), (1,5), (1,6), (1,7), (1,8), (1,9), (1,10), (1,11),
                      (0,11), (1,11), (2,11), (3,11), (4,11), (5,11), (6,11), (7,11), (8,11), (9,11), (10,11), (11,11),
                      (11,0), (11,1), (11,2), (11,3), (11,4), (11,5), (11,6), (11,7), (11,8), (11,9), (11,10), (11,11)]

# Definición de los puntos de cosecha, es decir, puntos por lo cuales las 
# cosechadoras deben comenzar para cosechar
puntos_cosecha = [(2,1), (9,1), (2,10), (9,10)]
# Clasificación de los puntos de cosecha por cuadrantes para una 
# mejor visualización e interacción en la simulación
primer_cuadrante_hoz = [(2,1), (2,10)]
segundo_cuadrante_hoz = [(9, 1), (9,10)]

In [138]:
# Obtener la posicion de reaparicion de la cosechadora de manera aleatoria
def obtener_posicion_reaparicion(puntos_reaparicion):
    indice_random = random.randint(0, len(puntos_reaparicion)-1)
    posicion_random = puntos_reaparicion[indice_random]
    return posicion_random 

In [139]:
# Calcular distancia entre dos puntos mediante la fórmula de la distancia euclidiana
def calcular_distancia(posicion_random, puntos_cosecha):
    distancia_minima = 1000
    posicion_a_dirigirse = (100, 100)
    for posicion in puntos_cosecha:
        distancia = np.sqrt((posicion_random[0] - posicion[0])**2 + (posicion_random[1] - posicion[1])**2)
        if distancia < distancia_minima:
            distancia_minima = distancia
            posicion_a_dirigirse = posicion
    return posicion_a_dirigirse

In [140]:
# Corresponde al recorrido incial de la cosechadora, es decir, desde el punto de reaparición hacia
# uno de los punto de cosecha. El más cercano 
def crear_entorno_incial(punto_comenzar_cosecha):
    filas_entorno = 12
    columnas_entorno = 12

    recompensas = np.full((filas_entorno, columnas_entorno), -1.0)

    for row in range(2, filas_entorno - 2):
        for col in range(2, columnas_entorno - 2):
            recompensas[row, col] = -100.0
    recompensas[punto_comenzar_cosecha[0], punto_comenzar_cosecha[1]] = 100.0
    return recompensas

In [141]:
# Permite crear todos los recorridos que pueden realizar las cosechadoras de 
# manera horizontal
def crear_entornos_cosechadora():
    entornos = []
    filas_entorno = 12
    columnas_entorno = 12

    recorrido_filas_inicial = 1
    recorrido = 9

    for _ in range(1, 9):
        # Inicializar con recompenzas negativas mayores
        recompensas = np.full((filas_entorno, columnas_entorno), -100.0)

        # Marcar zonas seguras con recompensas negativas menores
        for row in range(1 + recorrido_filas_inicial, filas_entorno - recorrido):
            for col in range(2, columnas_entorno - 2):
                recompensas[row, col] = -1.0

        entornos.append(recompensas)  # Agregar el entorno actual a la lista
        recorrido -= 1
        recorrido_filas_inicial += 1

    return entornos

In [142]:
# Corresponde a nuestro modelo de Q Learning, aquí se define las acciones que puede realizar el agente
# y se define la función de recompensa. Código recuperado de: https://www.analyticsvidhya.com/blog/2021/04/q-learning-algorithm-with-step-by-step-implementation-using-python/#:~:text=Q%2Dlearning%20is%20a%20reinforcement,certain%20actions%20in%20specific%20states.
	
class Agente():
    def __init__(self, rewards):
        self.rewards = rewards
        self.actions = ['up', 'right', 'down', 'left']
        self.environment_rows = 12
        self.environment_columns = 12

    def is_terminal_state(self, current_row_index, current_column_index):
        if self.rewards[current_row_index, current_column_index] == -1.:
            return False
        else:
            return True

    def get_starting_location(self):
        current_row_index = np.random.randint(self.environment_rows)
        current_column_index = np.random.randint(self.environment_columns)
        while self.is_terminal_state(current_row_index, current_column_index):
            current_row_index = np.random.randint(self.environment_rows)
            current_column_index = np.random.randint(self.environment_columns)
        return current_row_index, current_column_index

    def get_next_action(self, current_row_index, current_column_index, epsilon):
        if np.random.random() < epsilon:
            return np.argmax(q_values[current_row_index, current_column_index])
        else:
            return np.random.randint(4)

    def get_next_location(self, current_row_index, current_column_index, action_index):
        new_row_index = current_row_index
        new_column_index = current_column_index
        if self.actions[action_index] == 'up' and current_row_index > 0:
            new_row_index -= 1
        elif self.actions[action_index] == 'right' and current_column_index < self.environment_columns - 1:
            new_column_index += 1
        elif self.actions[action_index] == 'down' and current_row_index < self.environment_rows - 1:
            new_row_index += 1
        elif self.actions[action_index] == 'left' and current_column_index > 0:
            new_column_index -= 1
        return new_row_index, new_column_index

    def get_shortest_path(self, start_row_index, start_column_index):
        if self.is_terminal_state(start_row_index, start_column_index):
            return []
        else:
            current_row_index, current_column_index = start_row_index, start_column_index
            shortest_path = []
            shortest_path.append([current_row_index, current_column_index])
            while not self.is_terminal_state(current_row_index, current_column_index):
                action_index = self.get_next_action(current_row_index, current_column_index, 1.)
                current_row_index, current_column_index = self.get_next_location(current_row_index, current_column_index, action_index)
                shortest_path.append([current_row_index, current_column_index])
            return shortest_path


# Posicionar Cosehadora A

In [143]:
posicion_random_cos = obtener_posicion_reaparicion(puntos_reaparicion)
posicion_random_cos

(4, 11)

In [144]:
punto_comenzar_cosecha = calcular_distancia(posicion_random_cos, puntos_cosecha)
print(punto_comenzar_cosecha)

(2, 10)


In [145]:
if punto_comenzar_cosecha in primer_cuadrante_hoz:
    primer_hoz = True
    segundo_hoz = False
elif punto_comenzar_cosecha in segundo_cuadrante_hoz:
    primer_hoz = False
    segundo_hoz = True
print(primer_hoz)
print(segundo_hoz)

True
False


In [146]:
recorrido_posicionarse = crear_entorno_incial(punto_comenzar_cosecha=punto_comenzar_cosecha)
df = pd.DataFrame(recorrido_posicionarse)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,100.0,-1.0
3,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
4,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
5,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
6,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
7,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
8,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
9,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0


# Rutas de Cosechadora A

In [147]:
recorridos_cosechadora = []
recorridos_cosechadora = crear_entornos_cosechadora()

In [148]:
if primer_hoz:
    if punto_comenzar_cosecha == (2,1):
        # Recorrido de derecha a izquierda
        primer_recorrido = recorridos_cosechadora[0]
        # Punto Inicial 2,1
        # Punto Final 2,9
        primer_recorrido[punto_comenzar_cosecha] = -1.0
        primer_recorrido[2, 9] = 100.0

        segundo_recorrido = recorridos_cosechadora[1]
        # Punto Inicial 3,9
        # Punto Final 3,2
        segundo_recorrido[3, 2] = 100.0

        tercer_recorrido = recorridos_cosechadora[2]
        # Punto Inicial 4,2
        # Punto Final 4,9
        tercer_recorrido[4, 9] = 100.0

        cuarto_recorrido = recorridos_cosechadora[3]
        # Punto Inicial 5,9
        # Punto Final 5,2
        cuarto_recorrido[5, 2] = 100.0

        puntos_iniciales = [posicion_random_cos, punto_comenzar_cosecha, (3,9), (4,2), (5,9)]
     
    elif punto_comenzar_cosecha == (2,10):
        # Recorrido de izquierda a derecha
        primer_recorrido = recorridos_cosechadora[0]
        # Punto Inicial 2,10
        # Punto Final 2,2
        primer_recorrido[punto_comenzar_cosecha] = -1.0
        primer_recorrido[2, 2] = 100.0

        segundo_recorrido = recorridos_cosechadora[1]
        # Punto Inicial 3,2
        # Punto Final 3,9
        segundo_recorrido[3, 9] = 100.0

        tercer_recorrido = recorridos_cosechadora[2]
        # Punto Inicial 4,9
        # Punto Final 4,2
        tercer_recorrido[4, 2] = 100.0

        cuarto_recorrido = recorridos_cosechadora[3]
        # Punto Inicial 5,2
        # Punto Final 5,9
        cuarto_recorrido[5, 9] = 100.0

        puntos_iniciales = [posicion_random_cos, punto_comenzar_cosecha, (3,2), (4,9), (5,2)]
else:
    if punto_comenzar_cosecha == (9,1): # Falta revisar
        # Recorrido de izquierda a derecha
        primer_recorrido = recorridos_cosechadora[7]
        # Punto Inicial 9,1
        # Punto Final 9,9
        primer_recorrido[punto_comenzar_cosecha] = -1.0
        primer_recorrido[9, 9] = 100.0

        segundo_recorrido = recorridos_cosechadora[6]
        # Punto Inicial 8,9
        # Punto Final 8,2
        segundo_recorrido[8, 2] = 100.0

        tercer_recorrido = recorridos_cosechadora[5]
        # Punto Inicial 7,2
        # Punto Final 7,9
        tercer_recorrido[7, 9] = 100.0

        cuarto_recorrido = recorridos_cosechadora[4]
        # Punto Inicial 6,9
        # Punto Final 6,2
        cuarto_recorrido[6, 2] = 100.0

        puntos_iniciales = [posicion_random_cos, punto_comenzar_cosecha, (8,9), (7,2), (6,9)]
          
    elif punto_comenzar_cosecha == (9,10): # Falta revisar
        # Recorrido de izquierda a derecha
        primer_recorrido = recorridos_cosechadora[7]
        # Punto Inicial 9,10
        # Punto Final 9,2
        primer_recorrido[punto_comenzar_cosecha] = -1.0
        primer_recorrido[9, 2] = 100.0

        segundo_recorrido = recorridos_cosechadora[6]
        # Punto Inicial 8,2
        # Punto Final 8,9
        segundo_recorrido[8, 9] = 100.0

        tercer_recorrido = recorridos_cosechadora[5]
        # Punto Inicial 7,9
        # Punto Final 7,2
        tercer_recorrido[7, 2] = 100.0

        cuarto_recorrido = recorridos_cosechadora[4]
        # Punto Inicial 6,2
        # Punto Final 6,9
        cuarto_recorrido[6, 9] = 100.0

        puntos_iniciales = [posicion_random_cos, punto_comenzar_cosecha, (8,2), (7,9), (6,2)]

In [149]:
recorridos_cosechadora = [recorrido_posicionarse, primer_recorrido,
                          segundo_recorrido, tercer_recorrido, cuarto_recorrido]
print(puntos_iniciales)
print(recorridos_cosechadora[0])

[(4, 11), (2, 10), (3, 2), (4, 9), (5, 2)]
[[  -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.]
 [  -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.  100.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1. -100. -100. -100. -100. -100. -100. -100. -100.   -1.   -1.]
 [  -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.]
 [  -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.   -1.]]


In [150]:
paths_cosechadora = []

for index, rewards in enumerate(recorridos_cosechadora):
  cosechadora = Agente(rewards)

  #define training parameters
  epsilon = 0.9 #the percentage of time when we should take the best action (instead of a random action)
  discount_factor = 0.9 #discount factor for future rewards
  learning_rate = 0.9 #the rate at which the agent should learn

  num_actions = len(cosechadora.actions)
  q_values = np.zeros((cosechadora.environment_rows, cosechadora.environment_columns, num_actions))


  #run through 1000 training episodes
  for episode in range(1000):
    #get the starting location for this episode
    row_index, column_index = cosechadora.get_starting_location()
    #continue taking actions (i.e., moving) until we reach a terminal state
    #(i.e., until we reach the item packaging area or crash into an item storage location)
    while not cosechadora.is_terminal_state(row_index, column_index):
      #choose which action to take (i.e., where to move next)
      action_index = cosechadora.get_next_action(row_index, column_index, epsilon)
      #perform the chosen action, and transition to the next state (i.e., move to the next location)
      old_row_index, old_column_index = row_index, column_index #store the old row and column indexes
      row_index, column_index = cosechadora.get_next_location(row_index, column_index, action_index)
      #receive the reward for moving to the new state, and calculate the temporal difference
      reward = rewards[row_index, column_index]
      old_q_value = q_values[old_row_index, old_column_index, action_index]
      temporal_difference = reward + (discount_factor * np.max(q_values[row_index, column_index])) - old_q_value
      #update the Q-value for the previous state and action pair
      new_q_value = old_q_value + (learning_rate * temporal_difference)
      q_values[old_row_index, old_column_index, action_index] = new_q_value
  print('Training complete!')

  #display a few shortest paths
  path = cosechadora.get_shortest_path(puntos_iniciales[index][0], puntos_iniciales[index][1]) #starting at row 3, column 9
  print(path)
  paths_cosechadora.append(path)

Training complete!
[[4, 11], [3, 11], [3, 10], [2, 10]]
Training complete!
[[2, 10], [2, 9], [2, 8], [2, 7], [2, 6], [2, 5], [2, 4], [2, 3], [2, 2]]


Training complete!
[[3, 2], [3, 3], [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9]]
Training complete!
[[4, 9], [4, 8], [4, 7], [4, 6], [4, 5], [4, 4], [4, 3], [4, 2]]
Training complete!
[[5, 2], [5, 3], [5, 4], [5, 5], [5, 6], [5, 7], [5, 8], [5, 9]]


# Posicionar cosechadora B

In [151]:
# Obtener tanto la posicion de reaparicion de la cosechadora como el punto de inicio de la cosecha de manera 
# aleatoria 
posicion_random_cos_b = obtener_posicion_reaparicion(puntos_reaparicion)
punto_comenzar_cosecha_b = calcular_distancia(posicion_random_cos_b, puntos_cosecha)

# Verificar sí la cosechadora A se encuentra en el primer cuadrante de la horizontal, 
# sí es así, la cosechadora B debe de estar en el segundo cuadrante de la horizontal
# y viceversa
if not primer_hoz: 
    # Mientras la posicion de cosecha no se encuentre en el primer cuadrante de la horizontal
    # se debe de obtener una nueva posicion de reaparicion de la cosechadora que sí lo haga   
    while punto_comenzar_cosecha_b not in primer_cuadrante_hoz:
        posicion_random_cos_b = obtener_posicion_reaparicion(puntos_reaparicion)
        punto_comenzar_cosecha_b = calcular_distancia(posicion_random_cos_b, puntos_cosecha) 
if not segundo_hoz:
    # Mientras la posicion de cosecha no se encuentre en el segundo cuadrante de la horizontal
    # se debe de obtener una nueva posicion de reaparicion de la cosechadora que sí lo haga  
    while punto_comenzar_cosecha_b not in segundo_cuadrante_hoz:
        posicion_random_cos_b = obtener_posicion_reaparicion(puntos_reaparicion)
        punto_comenzar_cosecha_b = calcular_distancia(posicion_random_cos_b, puntos_cosecha) 
        
# Con lo anterior se evita que ambas cosechadoras trabajen sobre el mismo cuadrante de la horizontal      
print(posicion_random_cos_b)
print(punto_comenzar_cosecha_b)

(11, 11)
(9, 10)


In [152]:
# El recorrido de posicionarse corresponde al que a partir de un punto aleatorio de reaparicion
# se mueve al punto de cosecha más cercano
recorrido_posicionarse_b = crear_entorno_incial(punto_comenzar_cosecha=punto_comenzar_cosecha_b)
df = pd.DataFrame(recorrido_posicionarse_b)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
3,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
4,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
5,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
6,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
7,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
8,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
9,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,100.0,-1.0


# Rutas de Cosechadora B

In [153]:
recorridos_cosechadora_b = []
recorridos_cosechadora_b = crear_entornos_cosechadora()

In [154]:
# Se debe de verificar sí la cosechadora A se encuentra en el primer cuadrante de la horizontal
# para que la cosechadora B se encuentre en el segundo cuadrante de la horizontal y viceversa

if not primer_hoz:
    if punto_comenzar_cosecha_b == (2,1):
        # Recorrido de derecha a izquierda
        primer_recorrido_b = recorridos_cosechadora_b[0]
        # Punto Inicial 2,1
        # Punto Final 2,9
        primer_recorrido_b[punto_comenzar_cosecha_b] = -1.0
        primer_recorrido_b[2, 9] = 100.0

        segundo_recorrido_b = recorridos_cosechadora_b[1]
        # Punto Inicial 3,9
        # Punto Final 3,2
        segundo_recorrido_b[3, 2] = 100.0

        tercer_recorrido_b = recorridos_cosechadora_b[2]
        # Punto Inicial 4,2
        # Punto Final 4,9
        tercer_recorrido_b[4, 9] = 100.0

        cuarto_recorrido_b = recorridos_cosechadora_b[3]
        # Punto Inicial 5,9
        # Punto Final 5,2
        cuarto_recorrido_b[5, 2] = 100.0

        puntos_iniciales_b = [posicion_random_cos_b, punto_comenzar_cosecha_b, (3,9), (4,2), (5,9)]
     
    elif punto_comenzar_cosecha_b == (2,10):
        # Recorrido de derecha a izquierda
        primer_recorrido_b = recorridos_cosechadora_b[0]
        # Punto Inicial 2,10
        # Punto Final 2,2
        primer_recorrido_b[punto_comenzar_cosecha_b] = -1.0
        primer_recorrido_b[2, 2] = 100.0

        segundo_recorrido_b = recorridos_cosechadora_b[1]
        # Punto Inicial 3,2
        # Punto Final 3,9
        segundo_recorrido_b[3, 9] = 100.0

        tercer_recorrido_b = recorridos_cosechadora_b[2]
        # Punto Inicial 4,9
        # Punto Final 4,2
        tercer_recorrido_b[4, 2] = 100.0

        cuarto_recorrido_b = recorridos_cosechadora_b[3]
        # Punto Inicial 5,2
        # Punto Final 5,9
        cuarto_recorrido_b[5, 9] = 100.0

        puntos_iniciales_b = [posicion_random_cos_b, punto_comenzar_cosecha_b, (3,2), (4,9), (5,2)]
if not segundo_hoz:
    if punto_comenzar_cosecha_b == (9,1):
        # Recorrido de izquierda a derecha
        primer_recorrido_b = recorridos_cosechadora_b[7]
        # Punto Inicial 9,1
        # Punto Final 9,9
        primer_recorrido_b[punto_comenzar_cosecha_b] = -1.0
        primer_recorrido_b[9, 9] = 100.0

        segundo_recorrido_b = recorridos_cosechadora_b[6]
        # Punto Inicial 8,9
        # Punto Final 8,2
        segundo_recorrido_b[8, 2] = 100.0

        tercer_recorrido_b = recorridos_cosechadora_b[5]
        # Punto Inicial 7,2
        # Punto Final 7,9
        tercer_recorrido_b[7, 9] = 100.0

        cuarto_recorrido_b = recorridos_cosechadora_b[4]
        # Punto Inicial 6,9
        # Punto Final 6,2
        cuarto_recorrido_b[6, 2] = 100.0

        puntos_iniciales_b = [posicion_random_cos_b, punto_comenzar_cosecha_b, (8,9), (7,2), (6,9)]
          
    elif punto_comenzar_cosecha_b == (9,10): 
        # Recorrido de izquierda a derecha
        primer_recorrido_b = recorridos_cosechadora_b[7]
        # Punto Inicial 9,10
        # Punto Final 9,2
        primer_recorrido_b[punto_comenzar_cosecha_b] = -1.0
        primer_recorrido_b[9, 2] = 100.0

        segundo_recorrido_b = recorridos_cosechadora_b[6]
        # Punto Inicial 8,2
        # Punto Final 8,9
        segundo_recorrido_b[8, 9] = 100.0

        tercer_recorrido_b = recorridos_cosechadora_b[5]
        # Punto Inicial 7,9
        # Punto Final 7,2
        tercer_recorrido_b[7, 2] = 100.0

        cuarto_recorrido_b = recorridos_cosechadora_b[4]
        # Punto Inicial 6,2
        # Punto Final 6,9
        cuarto_recorrido_b[6, 9] = 100.0

        puntos_iniciales_b = [posicion_random_cos_b, punto_comenzar_cosecha_b, (8,2), (7,9), (6,2)]
    

In [155]:
puntos_iniciales_b

[(11, 11), (9, 10), (8, 2), (7, 9), (6, 2)]

In [156]:
# Se almacenan todos los recorridos en una lista para ser utilizados para entrenar a nuestro modelo
# de Q Learning
recorridos_cosechadora_b = [recorrido_posicionarse_b, primer_recorrido_b,
                          segundo_recorrido_b, tercer_recorrido_b, cuarto_recorrido_b]
print(puntos_iniciales_b)
print(recorridos_cosechadora_b[2])

[(11, 11), (9, 10), (8, 2), (7, 9), (6, 2)]
[[-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100.   -1.   -1.   -1.   -1.   -1.   -1.   -1.  100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]
 [-100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100. -100.]]


In [157]:
paths_cosechadora_b = []

for index, rewards in enumerate(recorridos_cosechadora_b):
  cosechadora = Agente(rewards)

  #define training parameters
  epsilon = 0.9 #the percentage of time when we should take the best action (instead of a random action)
  discount_factor = 0.9 #discount factor for future rewards
  learning_rate = 0.9 #the rate at which the agent should learn

  num_actions = len(cosechadora.actions)
  q_values = np.zeros((cosechadora.environment_rows, cosechadora.environment_columns, num_actions))


  #run through 1000 training episodes
  for episode in range(1000):
    #get the starting location for this episode
    row_index, column_index = cosechadora.get_starting_location()
    #continue taking actions (i.e., moving) until we reach a terminal state
    #(i.e., until we reach the item packaging area or crash into an item storage location)
    while not cosechadora.is_terminal_state(row_index, column_index):
      #choose which action to take (i.e., where to move next)
      action_index = cosechadora.get_next_action(row_index, column_index, epsilon)
      #perform the chosen action, and transition to the next state (i.e., move to the next location)
      old_row_index, old_column_index = row_index, column_index #store the old row and column indexes
      row_index, column_index = cosechadora.get_next_location(row_index, column_index, action_index)
      #receive the reward for moving to the new state, and calculate the temporal difference
      reward = rewards[row_index, column_index]
      old_q_value = q_values[old_row_index, old_column_index, action_index]
      temporal_difference = reward + (discount_factor * np.max(q_values[row_index, column_index])) - old_q_value
      #update the Q-value for the previous state and action pair
      new_q_value = old_q_value + (learning_rate * temporal_difference)
      q_values[old_row_index, old_column_index, action_index] = new_q_value
  print('Training complete!')

  #display a few shortest paths
  path = cosechadora.get_shortest_path(puntos_iniciales_b[index][0], puntos_iniciales_b[index][1]) #starting at row 3, column 9
  print(path)
  paths_cosechadora_b.append(path)

Training complete!
[[11, 11], [10, 11], [9, 11], [9, 10]]
Training complete!
[[9, 10], [9, 9], [9, 8], [9, 7], [9, 6], [9, 5], [9, 4], [9, 3], [9, 2]]
Training complete!
[[8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 7], [8, 8], [8, 9]]
Training complete!
[[7, 9], [7, 8], [7, 7], [7, 6], [7, 5], [7, 4], [7, 3], [7, 2]]
Training complete!
[[6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8], [6, 9]]


# Rutas de Tractor

In [158]:
# Ruta del tractor hacia la cosechadora sí se ubica en el primer cuadrante

def crear_entorno_tractor_pr_cua(posicion_cosechadora, filas_entorno=12, columnas_entorno=12):
    entornos = []

    # Marcar zonas no seguras con recompensas negativas
    recompensas = np.full((filas_entorno, columnas_entorno), -100.0)

    # Marcar zonas seguras con recompensas negativas menores
    for row in range(0 + posicion_cosechadora[0]):
        for col in range(0, 10):
            recompensas[row, col] = -1.0

    # Agregar el entorno actual a la lista
    entornos.append(recompensas)

    return entornos

# Ruta del tractor hacia la cosechadora sí se ubica en el segundo cuadrante
def crear_entorno_tractor_sg_cua(posicion_cosechadora, filas_entorno=12, columnas_entorno=12):
    entornos = []

    # Marcar zonas no seguras con recompensas negativas
    recompensas = np.full((filas_entorno, columnas_entorno), -100.0)

    # Marcar zonas seguras con recompensas negativas menores
    for row in range(11, posicion_cosechadora[0], -1):
        for col in range(0, columnas_entorno):
            recompensas[row, col] = -1.0

    # Agregar el entorno actual a la lista
    entornos.append(recompensas)

    return entornos

# Ruta del tractor hacia el trailer sí el tractor se ubica en el primer cuadrante
def crear_entorno_trailer_pr_cua(posicion_cosechadora, posicion_cosechadora_b):
    filas_entorno = 12
    columnas_entorno = 12

    # Marcar zonas no seguras con recompensas negativas menores
    recompensas = np.full((filas_entorno, columnas_entorno), -1.0)

    # Marcar zonas seguras con recompensas negativas mayores
    for row in range(6, filas_entorno):
        for col in range(2, columnas_entorno - 2):
            recompensas[row, col] = -100.0
    
    # Marcar las posiciones de las cosechadoras con posiciones negativas mayores para evitar colisiones
    recompensas[posicion_cosechadora[0], posicion_cosechadora[1]] = -100.0
    recompensas[posicion_cosechadora_b[0], posicion_cosechadora_b[1]] = -100.0
    recompensas[2, 11] = -100.0
    recompensas[3, 11] = -100.0
    
    return recompensas

# Ruta del tractor hacia el trailer sí el tractor se ubica en el segundo cuadrante
def crear_entorno_trailer_sg_cua(posicion_cosechadora, posicion_cosechadora_b):
    filas_entorno = 12
    columnas_entorno = 12

    # Marcar zonas no seguras con recompensas negativas menores
    recompensas = np.full((filas_entorno, columnas_entorno), -1.0)

    # Marcar zonas seguras con recompensas negativas mayores
    for row in range(2, filas_entorno - 6):
        for col in range(2, columnas_entorno - 2):
            recompensas[row, col] = -100.0
    
    # Marcar las posiciones de las cosechadoras con posiciones negativas mayores para evitar colisiones
    recompensas[posicion_cosechadora[0], posicion_cosechadora[1]] = -100.0
    recompensas[posicion_cosechadora_b[0], posicion_cosechadora_b[1]] = -100.0
    recompensas[2, 11] = -100.0
    recompensas[3, 11] = -100.0
    
    return recompensas


## Posiciones Cosechadoras para que los tractores eviten chocar con ellas al dirigirse al trailer

In [159]:
# Obtener posiciones de las cosechadoras y definir los cuadrantes sobre los cuales las
# cosechadoras se pueden detener
posicion_cosechadora = tuple(paths_cosechadora[4][-1])
posicion_cosechadora_b = tuple(paths_cosechadora_b[4][-1])
primer_cuadrante_cos = [(5,2), (5,9)]
segundo_cuadrante_cos = [(6,2), (6,9)]
print(posicion_cosechadora)
print(posicion_cosechadora_b)

(5, 9)
(6, 9)


# Recorrido Tractor A

In [160]:
# Validaciones para identificar sí la cosechadora se encuentra en el primer o segundo cuadrante
# para que de esta manera podamos posicionar al tractor en el cuadrante correspondiente 
primer_cuadrante_cos = [(5,2), (5,9)]
segundo_cuadrante_cos = [(6,2), (6,9)]

posicion_inicial = (0,0)

if posicion_cosechadora_b in primer_cuadrante_cos:

    recorrido_tractor = crear_entorno_tractor_pr_cua(posicion_cosechadora=posicion_cosechadora_b)
    
    # Posiciones de las cosechadoras
    recorrido_tractor[0][posicion_cosechadora_b[0], posicion_cosechadora_b[1]] = -100.0
    recorrido_tractor[0][posicion_cosechadora[0], posicion_cosechadora[1]] = -100.0
    
    if posicion_cosechadora_b == (5,2):
        recorrido_tractor[0][posicion_cosechadora_b[0] - 1, posicion_cosechadora_b[1] - 1] = 100.0
        
        # Corresponde al punto a partir del cual se dirigirá hacia el trailer
        posicion = (posicion_cosechadora_b[0] - 1, posicion_cosechadora_b[1] - 1)
    
    elif posicion_cosechadora_b == (5,9):
        recorrido_tractor[0][posicion_cosechadora_b[0] - 1, posicion_cosechadora_b[1]] = 100.0
        
        # Corresponde al punto a partir del cual se dirigirá hacia el trailer
        posicion = (posicion_cosechadora_b[0] - 1, posicion_cosechadora_b[1])

elif posicion_cosechadora in primer_cuadrante_cos:
    recorrido_tractor = crear_entorno_tractor_pr_cua(posicion_cosechadora=posicion_cosechadora)

    # Posiciones de las cosechadoras
    recorrido_tractor[0][posicion_cosechadora_b[0], posicion_cosechadora_b[1]] = -100.0
    recorrido_tractor[0][posicion_cosechadora[0], posicion_cosechadora[1]] = -100.0

    if posicion_cosechadora == (5,2):
        recorrido_tractor[0][posicion_cosechadora[0] - 1, posicion_cosechadora[1] - 1] = 100.0
        # Corresponde al punto a partir del cual se dirigirá hacia el trailer
        posicion = (posicion_cosechadora[0] - 1, posicion_cosechadora[1] -1)
    
    elif posicion_cosechadora == (5,9):
        recorrido_tractor[0][posicion_cosechadora[0] - 1, posicion_cosechadora[1]] = 100.0
        
        # Corresponde al punto a partir del cual se dirigirá hacia el trailer
        posicion = (posicion_cosechadora[0] - 1, posicion_cosechadora[1])

# Segundo reccorido del tractor hacia el trailer
recorido_tractor_trailer = crear_entorno_trailer_pr_cua(posicion_cosechadora=posicion_cosechadora, posicion_cosechadora_b=posicion_cosechadora_b)
# Posicion final del tractor (junto al trailer)
recorido_tractor_trailer[(1, 11)] = 100.0

primer_recorrido_tractor = recorrido_tractor[0]
segundo_recorrido_tractor = recorido_tractor_trailer
df = pd.DataFrame(segundo_recorrido_tractor)
df
    

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-100.0
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,100.0
4,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
5,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-100.0,-1.0,-1.0
6,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
7,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
8,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
9,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0


In [161]:
recorridos_tractor = [primer_recorrido_tractor, segundo_recorrido_tractor]
df = pd.DataFrame(segundo_recorrido_tractor)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-100.0
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,100.0
4,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
5,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-100.0,-1.0,-1.0
6,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
7,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
8,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
9,-1.0,-1.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0


In [162]:
paths_tractor = []
puntos_iniciales = [posicion_inicial, posicion]

for index, rewards in enumerate(recorridos_tractor):
  tractor = Agente(rewards)

  #define training parameters
  epsilon = 0.9 #the percentage of time when we should take the best action (instead of a random action)
  discount_factor = 0.9 #discount factor for future rewards
  learning_rate = 0.9 #the rate at which the agent should learn

  num_actions = len(tractor.actions)
  q_values = np.zeros((tractor.environment_rows, tractor.environment_columns, num_actions))


  #run through 1000 training episodes
  for episode in range(1000):
    #get the starting location for this episode
    row_index, column_index = tractor.get_starting_location()
    #continue taking actions (i.e., moving) until we reach a terminal state
    #(i.e., until we reach the item packaging area or crash into an item storage location)
    while not tractor.is_terminal_state(row_index, column_index):
      #choose which action to take (i.e., where to move next)
      action_index = tractor.get_next_action(row_index, column_index, epsilon)
      #perform the chosen action, and transition to the next state (i.e., move to the next location)
      old_row_index, old_column_index = row_index, column_index #store the old row and column indexes
      row_index, column_index = tractor.get_next_location(row_index, column_index, action_index)
      #receive the reward for moving to the new state, and calculate the temporal difference
      reward = rewards[row_index, column_index]
      old_q_value = q_values[old_row_index, old_column_index, action_index]
      temporal_difference = reward + (discount_factor * np.max(q_values[row_index, column_index])) - old_q_value
      #update the Q-value for the previous state and action pair
      new_q_value = old_q_value + (learning_rate * temporal_difference)
      q_values[old_row_index, old_column_index, action_index] = new_q_value
  print('Training complete!')

  #display a few shortest paths
  path = tractor.get_shortest_path(puntos_iniciales[index][0], puntos_iniciales[index][1]) 
  print(path)
  paths_tractor.append(path)

Training complete!
[[0, 0], [1, 0], [1, 1], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], [2, 6], [2, 7], [2, 8], [3, 8], [3, 9], [4, 9]]
Training complete!
[[4, 9], [3, 9], [3, 10], [3, 11]]


In [163]:
print(posicion_cosechadora_b)
print(posicion_cosechadora)

(6, 9)
(5, 9)


# Rutas de Tractor B

In [164]:
# Validaciones para identificar sí la cosechadora se encuentra en el primer o segundo cuadrante
# para que de esta manera podamos posicionar al tractor en el cuadrante correspondiente 

# Corresponde al punto inicial del tractor (reaparicion)
posicion_inicial_b = (11, 11)

if posicion_cosechadora_b in segundo_cuadrante_cos:
    #  Primer recorrido del tractor hacia la cosechadora
    recorrido_tractor_b = crear_entorno_tractor_sg_cua(posicion_cosechadora=posicion_cosechadora_b)

    # Posiciones de las cosechadoras
    recorrido_tractor_b[0][posicion_cosechadora[0], posicion_cosechadora[1]] = -100.0
    recorrido_tractor_b[0][posicion_cosechadora_b[0], posicion_cosechadora_b[1]] = -100.0
    
    if posicion_cosechadora_b == (6,2):
        recorrido_tractor_b[0][posicion_cosechadora_b[0] + 1, posicion_cosechadora_b[1] - 1] = 100.0 

        posicion_b = (posicion_cosechadora_b[0] + 1, posicion_cosechadora_b[1] - 1)

    elif posicion_cosechadora_b == (6,9):
        recorrido_tractor_b[0][posicion_cosechadora_b[0] + 1, posicion_cosechadora_b[1]] = 100.0
        # Corresponde al punto a partir del cual se dirigirá hacia el trailer

        posicion_b = (posicion_cosechadora_b[0] + 1, posicion_cosechadora_b[1])
    
elif posicion_cosechadora in segundo_cuadrante_cos:
    # Primer recorrido del tractor hacia la cosechadora
    recorrido_tractor_b = crear_entorno_tractor_sg_cua(posicion_cosechadora=posicion_cosechadora)

    # Posiciones de las cosechadoras
    recorrido_tractor_b[0][posicion_cosechadora[0], posicion_cosechadora[1]] = -100.0
    recorrido_tractor_b[0][posicion_cosechadora_b[0], posicion_cosechadora_b[1]] = -100.0

    if posicion_cosechadora == (6,2):
        recorrido_tractor_b[0][posicion_cosechadora[0] + 1, posicion_cosechadora[1] - 1] = 100.0

        posicion_b = (posicion_cosechadora_b[0] + 1, posicion_cosechadora_b[1] - 1)
    
    elif posicion_cosechadora == (6,9):
        recorrido_tractor_b[0][posicion_cosechadora[0] + 1, posicion_cosechadora[1]] = 100.0

        posicion_b = (posicion_cosechadora[0] + 1, posicion_cosechadora[1])
    
# Segundo reccorido del tractor b hacia el trailer
recorido_tractor_trailer_b = crear_entorno_trailer_sg_cua(posicion_cosechadora=posicion_cosechadora, posicion_cosechadora_b=posicion_cosechadora_b)
# Posicion final del tractor b (junto al trailer)
recorido_tractor_trailer_b[(5, 11)] = 100.0

primer_recorrido_tractor_b = recorrido_tractor_b[0]
segundo_recorrido_tractor_b = recorido_tractor_trailer_b
df = pd.DataFrame(recorrido_tractor_b[0])
df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
1,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
2,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
3,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
4,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
5,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
6,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0
7,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,100.0,-1.0,-1.0
8,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
9,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [165]:
# Se almacenan ambos recorridos del tractor en una lista
recorridos_tractor_b = [primer_recorrido_tractor_b, segundo_recorrido_tractor_b]

In [166]:
paths_tractor_b = []
# Se almacenan los puntos iniciales del tractor en una lista
puntos_iniciales = [posicion_inicial_b, posicion_b]

for index, rewards in enumerate(recorridos_tractor_b):
  tractor = Agente(rewards)

  #define training parameters
  epsilon = 0.9 #the percentage of time when we should take the best action (instead of a random action)
  discount_factor = 0.9 #discount factor for future rewards
  learning_rate = 0.9 #the rate at which the agent should learn

  num_actions = len(tractor.actions)
  q_values = np.zeros((tractor.environment_rows, tractor.environment_columns, num_actions))


  #run through 1000 training episodes
  for episode in range(1000):
    #get the starting location for this episode
    row_index, column_index = tractor.get_starting_location()
    #continue taking actions (i.e., moving) until we reach a terminal state
    #(i.e., until we reach the item packaging area or crash into an item storage location)
    while not tractor.is_terminal_state(row_index, column_index):
      #choose which action to take (i.e., where to move next)
      action_index = tractor.get_next_action(row_index, column_index, epsilon)
      #perform the chosen action, and transition to the next state (i.e., move to the next location)
      old_row_index, old_column_index = row_index, column_index #store the old row and column indexes
      row_index, column_index = tractor.get_next_location(row_index, column_index, action_index)
      #receive the reward for moving to the new state, and calculate the temporal difference
      reward = rewards[row_index, column_index]
      old_q_value = q_values[old_row_index, old_column_index, action_index]
      temporal_difference = reward + (discount_factor * np.max(q_values[row_index, column_index])) - old_q_value
      #update the Q-value for the previous state and action pair
      new_q_value = old_q_value + (learning_rate * temporal_difference)
      q_values[old_row_index, old_column_index, action_index] = new_q_value
  print('Training complete!')

  #display a few shortest paths
  path = tractor.get_shortest_path(puntos_iniciales[index][0], puntos_iniciales[index][1]) 
  print(path)
  paths_tractor_b.append(path)

Training complete!
[[11, 11], [10, 11], [9, 11], [9, 10], [9, 9], [8, 9], [7, 9]]
Training complete!
[[7, 9], [7, 10], [6, 10], [6, 11]]


# Ruta del Trailer

In [167]:
def crear_entorno_trailer():
    filas_entorno = 12
    columnas_entorno = 12
    entornos = []

    # Inicializar recompensas
    recompensas = np.full((filas_entorno, columnas_entorno), -1.0)
    # Marcar zonas seguras con recompensas negativas
    for row in range(1, filas_entorno):
        for col in range(0, columnas_entorno-2):
            recompensas[row, col] = -100.0
    
    # Posiciones finales de los tractores (evitar colisiones por el trailer)
    recompensas[(2, 11)] = -100.0
    recompensas[(5, 11)] = -100.0
    entornos.append(recompensas)  # Agregar el entorno actual a la lista

    return entornos

In [168]:
recorrido_trailer = []
recorrido_trailer = crear_entorno_trailer()


primer_recorrido_trailer = recorrido_trailer[0]
# Punto Inicial 2,11
# Punto Final 0,0
primer_recorrido_trailer[0,0] = 100.0
df = pd.DataFrame(primer_recorrido_trailer)
df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,100.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
2,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
3,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-100.0
4,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
5,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
6,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-100.0
7,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
8,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0
9,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-100.0,-1.0,-1.0


In [169]:
paths_trailer = []
puntos_iniciales = [(3,11)]

for index, rewards in enumerate(recorrido_trailer):
  trailer = Agente(rewards)

  #define training parameters
  epsilon = 0.9 #the percentage of time when we should take the best action (instead of a random action)
  discount_factor = 0.9 #discount factor for future rewards
  learning_rate = 0.9 #the rate at which the agent should learn

  num_actions = len(trailer.actions)
  q_values = np.zeros((trailer.environment_rows, trailer.environment_columns, num_actions))


  #run through 1000 training episodes
  for episode in range(1000):
    #get the starting location for this episode
    row_index, column_index = trailer.get_starting_location()
    #continue taking actions (i.e., moving) until we reach a terminal state
    #(i.e., until we reach the item packaging area or crash into an item storage location)
    while not trailer.is_terminal_state(row_index, column_index):
      #choose which action to take (i.e., where to move next)
      action_index = trailer.get_next_action(row_index, column_index, epsilon)
      #perform the chosen action, and transition to the next state (i.e., move to the next location)
      old_row_index, old_column_index = row_index, column_index #store the old row and column indexes
      row_index, column_index = trailer.get_next_location(row_index, column_index, action_index)
      #receive the reward for moving to the new state, and calculate the temporal difference
      reward = rewards[row_index, column_index]
      old_q_value = q_values[old_row_index, old_column_index, action_index]
      temporal_difference = reward + (discount_factor * np.max(q_values[row_index, column_index])) - old_q_value
      #update the Q-value for the previous state and action pair
      new_q_value = old_q_value + (learning_rate * temporal_difference)
      q_values[old_row_index, old_column_index, action_index] = new_q_value
  print('Training complete!')

  #display a few shortest paths
  path = trailer.get_shortest_path(puntos_iniciales[index][0], puntos_iniciales[index][1]) 
  print(path)
  paths_trailer.append(path)

Training complete!
[[5, 11], [5, 10], [4, 10], [3, 10], [2, 10], [1, 10], [0, 10], [0, 9], [0, 8], [0, 7], [0, 6], [0, 5], [0, 4], [0, 3], [0, 2], [0, 1], [0, 0]]


In [170]:
paths_cosechadora

[[[4, 11], [3, 11], [3, 10], [2, 10]],
 [[2, 10], [2, 9], [2, 8], [2, 7], [2, 6], [2, 5], [2, 4], [2, 3], [2, 2]],
 [[3, 2], [3, 3], [3, 4], [3, 5], [3, 6], [3, 7], [3, 8], [3, 9]],
 [[4, 9], [4, 8], [4, 7], [4, 6], [4, 5], [4, 4], [4, 3], [4, 2]],
 [[5, 2], [5, 3], [5, 4], [5, 5], [5, 6], [5, 7], [5, 8], [5, 9]]]

In [171]:
paths_cosechadora_b

[[[11, 11], [10, 11], [9, 11], [9, 10]],
 [[9, 10], [9, 9], [9, 8], [9, 7], [9, 6], [9, 5], [9, 4], [9, 3], [9, 2]],
 [[8, 2], [8, 3], [8, 4], [8, 5], [8, 6], [8, 7], [8, 8], [8, 9]],
 [[7, 9], [7, 8], [7, 7], [7, 6], [7, 5], [7, 4], [7, 3], [7, 2]],
 [[6, 2], [6, 3], [6, 4], [6, 5], [6, 6], [6, 7], [6, 8], [6, 9]]]

In [172]:
paths_tractor

[[[0, 0],
  [1, 0],
  [1, 1],
  [2, 1],
  [2, 2],
  [2, 3],
  [2, 4],
  [2, 5],
  [2, 6],
  [2, 7],
  [2, 8],
  [3, 8],
  [3, 9],
  [4, 9]],
 [[4, 9], [3, 9], [3, 10], [3, 11]]]

In [173]:
paths_tractor_b

[[[11, 11], [10, 11], [9, 11], [9, 10], [9, 9], [8, 9], [7, 9]],
 [[7, 9], [7, 10], [6, 10], [6, 11]]]

In [174]:
paths_trailer

[[[5, 11],
  [5, 10],
  [4, 10],
  [3, 10],
  [2, 10],
  [1, 10],
  [0, 10],
  [0, 9],
  [0, 8],
  [0, 7],
  [0, 6],
  [0, 5],
  [0, 4],
  [0, 3],
  [0, 2],
  [0, 1],
  [0, 0]]]

In [175]:
import json

In [176]:
format_json_cosechadora = [{"x": path[0]*10, "y": 1.76, "z":path[1]*10} for recorrido in paths_cosechadora for path in recorrido ]

format_json_cosechadora_b = [{"x": path[0]*10, "y": 1.76, "z":path[1]*10} for recorrido in paths_cosechadora_b for path in recorrido ]

format_json_tractor= [{"x": path[0]*10, "y": 1.76, "z":path[1]*10} for recorrido in paths_tractor for path in recorrido ]

format_json_tractor_b = [{"x": path[0]*10, "y": 1.76, "z":path[1]*10} for recorrido in paths_tractor_b for path in recorrido ]

format_json_trailer = [{"x": path[0]*10, "y": 1.76, "z":path[1]*10} for recorrido in paths_trailer for path in recorrido ]

# Variables de Simulacion

In [177]:
entorno = input("Deseas simular un entorno con lluvia? (y/n): ")
if entorno.lower() == "y":
  entorno = True
  velocidad = 3
  rotacion = 2.7
else:
  entorno = False
  velocidad = 5
  rotacion = 5

In [178]:
data = {
    "velocidad": velocidad,
    "rotacion": rotacion,
    "nublado": entorno,   
    "cosechadora_a" : format_json_cosechadora,
    "cosechadora_b" : format_json_cosechadora_b,
    "tractor" : format_json_tractor,
    "tractor_b" : format_json_tractor_b,
    "trailer" : format_json_trailer	
}

In [179]:
# Guardar el diccionario en un archivo JSON
json_filename = "data.json"
with open(json_filename, "w") as json_file:
    json.dump(data, json_file, indent=4)

print(f"Archivo JSON '{json_filename}' creado exitosamente.")

Archivo JSON 'data.json' creado exitosamente.


# WebSocket

In [180]:
import socket

# Configuración del servidor
HOST = '127.0.0.1'  # Dirección IP local
PORT = 12345       # Puerto para la conexión

# Crear un socket
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.bind((HOST, PORT))
server_socket.listen(1)

print("Esperando la conexión desde Unity...")

# Aceptar la conexión entrante
conn, addr = server_socket.accept()
print('Conectado a', addr)

# Leer el contenido del archivo
with open('data.json', 'rb') as file:
    file_data = file.read()

# Enviar los datos al cliente (Unity)
conn.sendall(file_data)
print("Archivo enviado exitosamente.")

# Cerrar la conexión
conn.close()
server_socket.close()


OSError: [Errno 98] Address already in use