In [None]:
!pip install torch

In [2]:
import numpy as np
# import pandas as pd
import scipy as sp
import math, copy, random, multiprocessing
import matplotlib.pyplot as plt
import torch
from sklearn.decomposition import PCA
import torch.nn as nn
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 

In [3]:
class Rover_architecture(nn.Module):
    def __init__(self):
        super(Rover_architecture, self).__init__()
        self.fc1 = nn.Linear(20, 32)  # Input layer (16 units to 32 units)
        self.fc2 = nn.Linear(32, 32)  # Hidden layer (32 units to 32 units)
        self.fc3 = nn.Linear(32, 2)   # Output layer (32 units to 2 units)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        return x

class Drone_Excavator_architecture(nn.Module):
    def __init__(self):
        super(Drone_Excavator_architecture, self).__init__()
        self.fc1 = nn.Linear(16, 32)  # Input layer (16 units to 32 units)
        self.fc2 = nn.Linear(32, 32)  # Hidden layer (32 units to 32 units)
        self.fc3 = nn.Linear(32, 2)   # Output layer (32 units to 2 units)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        return x


In [4]:
class Rover:
    def __init__(self, Game_x, Game_y, model=Rover_architecture()):
        self.type = 'Rover'
        self.position = np.array([np.random.uniform(0,Game_x), np.random.uniform(0,Game_y)])
        self.model = model
        self.Game_x = Game_x
        self.Game_y = Game_y

    def compute_quadrant(self, position):
      quadrant = 0
      if(position[0] <= self.Game_x//2):
        if(position[1] <= self.Game_y //2):
          quadrant = 1
        else:
          qaudrant = 3
      else:
        if(position[1] <= self.Game_y //2):
          quadrant = 2
        else:
          quadrant = 4
      return quadrant

    def get_next_move(self,sites, agents):
      input_data = torch.Tensor(self.compute_site_density(sites)+ self.compute_agent_density(agents))
      dx,dy = self.model.forward(input_data)
      self.position[0] += dx
      self.position[1] += dy
      if self.position[0] > self.Game_x: self.position[0] = self.Game_x
      if self.position[1] > self.Game_y: self.position[1] = self.Game_y
      return [dx,dy]


    def rollout_island(self, sites, agents, num_steps):
      trajectory = []

      for _ in range(num_steps):
        # Get the current state observation (site and agent data)
        site_density = self.compute_site_density(sites)
        agent_density = self.compute_agent_density(agents)
        state = torch.Tensor(site_density + agent_density)

        # Store the current state
        current_state = state.clone()

        # Choose an action using the policy (assuming a discrete action space)
        action = self.get_next_move(sites,agents)


        # Calculate the reward based on the new state
        closest_dig_site = self.find_closest_dig_site(sites)
        reward = self.reward(closest_dig_site)

        # Store the data for this time step
        trajectory.append([current_state, action, reward])

      return trajectory

    def compute_site_density(self, sites):
        # Implement equation 3
        site_density = [0]*8
        for site in sites:
            quadrant = site.quadrant-1
            distance = np.linalg.norm(self.position - site.position)
            if site.type == 'Marked':
              site_density[quadrant] += site.value/distance
            if site.type == 'Unmarked':
              site_density[quadrant+4] += site.value / distance
        return site_density

    def compute_agent_density(self, agents):
        # Implement equation 2 first 4 rover , then excavator, then drone
        agent_density = [0]*12
        for agent in agents:
          distance = np.linalg.norm(self.position - agent.position)
          quadrant = self.compute_quadrant(agent.position)-1
          if agent.type == "Rover": agent_id = 0
          elif agent.type == "Excavator": agent_id = 1
          else: agent_id = 2

          if(distance == 0):
            continue
          agent_density[4*agent_id + quadrant] += 1/distance

        return agent_density

    def reward(self, closest_dig_site):
        return 1/ torch.norm(torch.tensor(self.position, requires_grad=True) - torch.tensor(closest_dig_site.position), p=2).requires_grad_()

    def find_closest_dig_site(self, sites):
      min = 100000000
      best = sites[0]
      for i in sites:
        dist = np.linalg.norm(self.position-i.position)
        if(dist < min):
          min = dist
          best = i

      return best

########################################################################################################################

class Excavator:
    def __init__(self, Game_x, Game_y, model=Drone_Excavator_architecture()):
        self.type = 'Excavator'
        self.position = np.array([np.random.uniform(0,Game_x), np.random.uniform(0,Game_y)])
        self.model = model
        self.Game_x = Game_x
        self.Game_y = Game_y

    def compute_quadrant(self, position):
      quadrant = 0

      if(position[0] <= self.Game_x//2):
        if(position[1] <= self.Game_y //2):
          quadrant = 1
        else:
          qaudrant = 3
      else:
        if(position[1] <= self.Game_y //2):
          quadrant = 2
        else:
          quadrant = 4
      return quadrant

    def get_next_move(self,sites, agents):
      input_data = torch.Tensor(self.compute_site_density(sites)+ self.compute_agent_density(agents))
      dx,dy = self.model.forward(input_data)
      self.position[0] += dx
      self.position[1] += dy
      if self.position[0] > self.Game_x: self.position[0] = self.Game_x
      if self.position[1] > self.Game_y: self.position[1] = self.Game_y
      return [dx,dy]


    def rollout_island(self, sites, agents, num_steps):
      trajectory = []

      for _ in range(num_steps):
        # Get the current state observation (site and agent data)
        site_density = self.compute_site_density(sites)
        agent_density = self.compute_agent_density(agents)
        state = torch.Tensor(site_density + agent_density)

        # Store the current state
        current_state = state.clone()

        # Choose an action using the policy (assuming a discrete action space)
        action = self.get_next_move(sites,agents)


        # Calculate the reward based on the new state
        closest_dig_site = self.find_closest_dig_site(sites)
        reward = self.reward(closest_dig_site)

        # Store the data for this time step
        trajectory.append([current_state, action, reward])

      return trajectory

    def compute_site_density(self, sites):
        # Implement equation 3
        site_density = [0]*4
        for site in sites:
            quadrant = site.quadrant-1
            distance = np.linalg.norm(self.position - site.position)
            if site.type == 'Marked':
              site_density[quadrant] += site.value/distance
        return site_density

    def compute_agent_density(self, agents):
        # Implement equation 2 first 4 rover , then excavator, then drone
        agent_density = [0]*12
        for agent in agents:
          distance = np.linalg.norm(self.position - agent.position)
          quadrant = self.compute_quadrant(agent.position)-1
          if agent.type == "Rover": agent_id = 0
          elif agent.type == "Excavator": agent_id = 1
          else: agent_id = 2

          if(distance == 0):
            continue
          agent_density[4*agent_id + quadrant] += 1/distance

        return agent_density

    def reward(self, closest_dig_site):
        return 1/ torch.norm(torch.tensor(self.position, requires_grad=True) - torch.tensor(closest_dig_site.position), p=2).requires_grad_()

    def find_closest_dig_site(self, sites):
      min = 100000000
      best = sites[0]
      for i in sites:
        dist = np.linalg.norm(self.position-i.position)
        if(dist < min):
          min = dist
          best = i

      return best

########################################################################################################################

class Drone:
    def __init__(self, Game_x, Game_y, model=Drone_Excavator_architecture()):
        self.type = 'Drone'
        self.position = np.array([np.random.uniform(0,Game_x), np.random.uniform(0,Game_y)])
        self.model = model
        self.Game_x = Game_x
        self.Game_y = Game_y

    def compute_quadrant(self, position):
      quadrant = 0
      if(position[0] <= self.Game_x//2):
        if(position[1] <= self.Game_y //2):
          quadrant = 1
        else:
          qaudrant = 3
      else:
        if(position[1] <= self.Game_y //2):
          quadrant = 2
        else:
          quadrant = 4
      return quadrant

    def get_next_move(self,sites, agents):
      input_data = torch.Tensor(self.compute_site_density(sites)+ self.compute_agent_density(agents))
      dx,dy = self.model.forward(input_data)
      self.position[0] += dx
      self.position[1] += dy
      if self.position[0] > self.Game_x: self.position[0] = self.Game_x
      if self.position[1] > self.Game_y: self.position[1] = self.Game_y
      return [dx,dy]


    def rollout_island(self, sites, agents, num_steps):
      trajectory = []

      for _ in range(num_steps):
        # Get the current state observation (site and agent data)
        site_density = self.compute_site_density(sites)
        agent_density = self.compute_agent_density(agents)
        state = torch.Tensor(site_density + agent_density)

        # Store the current state
        current_state = state.clone()

        # Choose an action using the policy (assuming a discrete action space)
        action = self.get_next_move(sites,agents)

        # Calculate the reward based on the new state
        closest_dig_site = self.find_closest_dig_site(sites)
        reward = self.reward(closest_dig_site)

        # Store the data for this time step
        trajectory.append([current_state, action, reward])

      return trajectory

    def compute_site_density(self, sites):
        # Implement equation 3
        site_density = [0]*4
        for site in sites:
            quadrant = site.quadrant-1
            distance = np.linalg.norm(self.position - site.position)
            if site.type == 'Marked':
              site_density[quadrant] += site.value/distance
        return site_density

    def compute_agent_density(self, agents):
        # Implement equation 2 first 4 rover , then excavator, then drone
        agent_density = [0]*12
        for agent in agents:
          distance = np.linalg.norm(self.position - agent.position)
          quadrant = self.compute_quadrant(agent.position)-1
          if agent.type == "Rover": agent_id = 0
          elif agent.type == "Excavator": agent_id = 1
          else: agent_id = 2

          if(distance == 0):
            continue
          agent_density[4*agent_id + quadrant] += 1/distance

        return agent_density

    def reward(self, closest_dig_site):
        return 1/ torch.norm(torch.tensor(self.position, requires_grad=True) - torch.tensor(closest_dig_site.position), p=2).requires_grad_()

    def find_closest_dig_site(self, sites):
      min = 100000000
      best = sites[0]
      for i in sites:
        dist = np.linalg.norm(self.position-i.position)
        if(dist < min):
          min = dist
          best = i

      return best

In [5]:
class Island:
    def __init__(self, a_type, numAgents):
        self.agent_type = a_type
        self.numAgents = numAgents
        self.agents = []
        self.dataset = []


    def initialize_agents(self,dim):
        self.dim = dim
        if(self.agent_type == 'Rover'):
            for i in range(self.numAgents):
                self.agents.append(Rover(dim[0],dim[1]))

        if(self.agent_type == 'Excavator'):
            for i in range(self.numAgents):
                self.agents.append(Excavator(dim[0],dim[1]))

        if(self.agent_type == 'Drone'):
            for i in range(self.numAgents):
                self.agents.append(Drone(dim[0],dim[1]))


    def update_policies(self, sites, agents, N):
      # Algorithm 1
        for i in range(N):
            policy = random.choice(self.agents)

            # perturb weights
            parameters_list = list(policy.model.parameters())
            ## Generate Gaussian noise with the same shape as the tensor
            noise = torch.randn(parameters_list[2].size()) * 1 + 0.1
            for i,param in enumerate(policy.model.parameters()):
              if(i ==2):
                param.data += noise


            # Perform a rollout using the policy
            rollout_data = policy.rollout_island(sites,agents,20)  # Implement the 'rollout' method for each agent

            # Apply PPO
            policy.model = self.train_ppo(policy.model, rollout_data)

            # add to population
            agents.append(policy)


    def train_ppo(self, model, rollout_data):
        model.train()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
        for state, action, reward in rollout_data:
          loss = -1 * reward
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
        model.eval()
        return model

    def add_agents(self, dim, list_agents):
      for i in list_agents:
        if self.agent_type=="Rover":
          self.agents += [ Rover(dim[0], dim[1], i.model) ]
        elif self.agent_type=="Excavator":
          self.agents += [ Excavator(dim[0], dim[1], i.model) ]
        else:
          self.agents += [ Drone(dim[0], dim[1], i.model) ]


    def update_latent_space(self):
        # Get all models
        models = [i.model for i in self.agents]
        # get all weights
        weight_vectors = []
        for model in models:
            weights = []
            for param in model.parameters():
                weights.append(param.data.view(-1).numpy())
            weight_vectors.append(np.concatenate(weights))
        # Perform PCA to reduce dimensionality
        pca = PCA(n_components=20)
        reduced_vectors = pca.fit_transform(weight_vectors)
        # get furthest vectors
        m = self.numAgents
        distances = np.linalg.norm(weight_vectors, axis=1)
        furthest_indices = np.argpartition(distances, -m)[-m:]
        # update models
        for i in range(m):
          self.agents[i].model = models[furthest_indices[i]]
        self.agents = self.agents[:m]

In [6]:
# re-write
class Site:

  def __init__(self, position, value,quadrant):
    self.position = position
    self.value = value
    self.type = "Unmarked"
    self.quadrant = quadrant
    self.excavated = False
    self.time_marked = None
###########################################################################################################################
class Team:
  def __init__(self,list_agents, game_mode):
    self.fitness = None
    self.R, self.D, self.E = [], [], []
    for i in list_agents:
      if i.type=="Rover": self.R.append(i)
      elif i.type=="Excavator": self.E.append(i)
      else: self.D.append(i)
    self.game_mode = game_mode
    # coupling values
    self.excavator_coupling_requirement = 3
    self.rover_coupling_requirement = 3
    self.excavator_observation_radius = 15
    self.rover_observation_radius = 10
    self.drone_observation_radius = 20
    self.num_marked = 0
    self.num_excavated = 0
    self.num_communicated = 0


  def mark_site(self, site, curr_ts):
      rovs = 0
      for rover in self.R:
          if np.linalg.norm(rover.position - site.position) <= self.rover_observation_radius:
              rovs += 1
      if rovs >= self.rover_coupling_requirement:
          site.type = 'Marked'
          site.time_marked = curr_ts
          self.num_marked += 1

  def excavate_site(self, site):
      excs = 0
      for excavator in self.E:
          if np.linalg.norm(excavator.position - site.position) <= self.excavator_observation_radius:
              excs += 1
      if excs >= self.excavator_coupling_requirement:
          site.excavated = True
          self.num_excavated += 1


  def rollout(self, digsites, num_iterations):
    agents = self.R + self.E + self.D
    for t in range(num_iterations):
      for agent in agents:
        agent.get_next_move(digsites,agents)
      for site in digsites:
        self.mark_site(site, t)
        if(site.type == "Marked"):
          self.excavate_site(site)
      if self.game_mode is None: pass
      elif self.game_mode=="D": Decay(digsites)
      elif self.game_mode=="V": Volatile(digsites, t)
      else:
        Decay(digsites)
        Volatile(digsites,t)

  def calc_fitness(self,digsites_updated):
    fitness = 0
    for dig_site in digsites_updated:
        # Implement equation 4

      if(dig_site.excavated == False):
        continue
      drones_covered = 0  # Number of drones covering the dig site
      for drone in self.D:
          distance = np.linalg.norm(drone.position - dig_site.position)
          if distance <= self.drone_observation_radius:
              drones_covered += 1

      if drones_covered > 0:
          fitness += dig_site.value
          self.num_communicated += 1


    self.fitness = fitness

  def get_fitness(self, digsites):
    digsites_updated = copy.deepcopy(digsites)
    if self.fitness is not None:
      return
    else:
      self.rollout(digsites_updated,20)
      self.calc_fitness(digsites_updated)

#############################################################################################################################################

class Mainland:

  def __init__(self, pop_size, team_size, elite_size, X_max, Y_max, num_digsites,game_mode):
    self.ts = team_size
    self.ps = pop_size
    self.dim = [X_max, Y_max]
    self.dss = num_digsites
    self.es = elite_size
    self.sites = []
    self.population = []
    self.rover_score = 0
    self.excavator_score = 0
    self.drone_score = 0
    self.game_mode = game_mode # D for decay, V for volatile, M

  def place_digsites(self):
    for _ in range(self.dss):
      position = [random.randint(0,self.dim[0]), random.randint(0,self.dim[1])]
      value = np.random.randint(1,10)
      # find quad
      x, y = self.dim
      if(position[0] <= x//2):
        if(position[1] <= y //2):
            quadrant = 1
        else:
          quadrant = 3
      else:
          if(position[1] <= y //2):
            quadrant = 2
          else:
            quadrant = 4
      self.sites.append(Site(position, value, quadrant))

  def update_teams(self, All_r, All_e, All_d):
    # input is list of policies of rovers, excavators, drones
    All_r = [Rover(self.dim[0], self.dim[1], i) for i in All_r]
    All_e = [Excavator(self.dim[0], self.dim[1], i) for i in All_e]
    All_d = [Drone(self.dim[0], self.dim[1], i) for i in All_d]
    All_agents = copy.deepcopy(All_r + All_e + All_d)
    teams = [Team(random.choices(All_agents, k=self.ts), self.game_mode) for _ in range(self.ps)]
    self.population = teams

  def initialize(self):
    # input is list of policies of rovers, excavators, drones
    self.place_digsites()

  def crossover_possible(self,T1, T2):
    # atleast one agent class in common
    ret = ((len(T1.R)>0) and (len(T2.R)>0)) or ((len(T1.E)>0) and (len(T2.E)>0)) or ((len(T1.D)>0) and (len(T2.D)>0))
    return ret

  def crossover(self, T_elite, T_nonelite):
    # cross over T_e and T_ne
    ## from T_ne, and 2 children return the one with the highest fitness [tournament selection]
    while True:
      rng = random.randint(1,3)
      if rng==1:
        # exchange rover
        if not ((len(T_elite.R)>0) and (len(T_nonelite.R)>0)):
          continue
        C1, C2 = copy.deepcopy(T_elite), copy.deepcopy(T_nonelite)
        idx1, idx2 = random.choice(range(len(C1.R))), random.choice(range(len(C2.R)))
        C1.R[idx1], C2.R[idx2] = C2.R[idx2], C1.R[idx1]
      if rng==2:
        # exchange excavator
        if not ((len(T_elite.E)>0) and (len(T_nonelite.E)>0)):
          continue
        C1, C2 = copy.deepcopy(T_elite), copy.deepcopy(T_nonelite)
        idx1, idx2 = random.choice(range(len(C1.E))), random.choice(range(len(C2.E)))
        C1.E[idx1], C2.E[idx2] = C2.E[idx2], C1.E[idx1]
      if rng==3:
        # exchange drone
        if not ((len(T_elite.D)>0) and (len(T_nonelite.D)>0)):
          continue
        C1, C2 = copy.deepcopy(T_elite), copy.deepcopy(T_nonelite)
        idx1, idx2 = random.choice(range(len(C1.D))), random.choice(range(len(C2.D)))
        C1.D[idx1], C2.D[idx2] = C2.D[idx2], C1.D[idx1]
      break
    C1.fitness, C2.fitness = None, None
    C1.get_fitness(self.sites)
    C2.get_fitness(self.sites)
    # parallel for children
    with multiprocessing.Pool() as pool:
        pop = pool.map(self.parallel, [C1,C2])
    l = [T_nonelite, *pop]
    l.sort(key=lambda team: team.fitness, reverse=True)
    return l[0]

  def parallel(self,team_in):
    team_in.get_fitness(self.sites)
    return team_in

  def run_algo(self, N):
    # algorithm 2
    for gen in tqdm(range(N)):

      # get fitness
      # for team in self.population:
      #   # team.get_fitness(self.sites)
      #   self.rover_score = team.num_marked
      #   self.excavator_score = team.num_excavated
      #   self.drone_score = team.num_communicated
      with multiprocessing.Pool() as pool:
        pop = pool.map(self.parallel, self.population)
      self.population = pop
    
      # get the set E
      self.population.sort(key=lambda team: team.fitness, reverse=True)
      E = self.population[:self.es]
      T_e = self.population[self.es+1:]

      # cross over
      S = []
      for pi_y in T_e:
        pi_x = random.choice(E)
        while not self.crossover_possible(pi_x, pi_y):
          pi_x = random.choice(E)
        S.append(self.crossover(pi_x, pi_y))

      # T <- S ∪ E
      self.population = E + S
        
    for team in self.population:
      self.rover_score += team.num_marked
      self.excavator_score += team.num_excavated
      self.drone_score += team.num_communicated

  def get_elite(self):
    for team in self.population: team.get_fitness(self.sites)
    self.population.sort(key=lambda team: team.fitness, reverse=True)
    E = self.population[:self.es]
    return E

In [7]:
def init_weights(num_mainlands, num_islands):
  global weights
  weights = np.zeros((num_mainlands,num_islands))
  prob = np.zeros((num_mainlands, num_islands))
  for i in range(num_mainlands):
    for j in range(num_islands):
      weights[i][j] = 1/num_islands ## initially start with every island having equal weightage

#############################################################################################################################################
def distribution_from_weights(num_mainlands,num_islands):
  global prob
  prob = np.zeros((num_mainlands, num_islands))
  for i in range(num_mainlands):
    for j in range(num_islands):
      prob[i][j] = math.exp(weights[i][j])/np.sum(np.exp(weights[:, j]))
#############################################################################################################################################
def get_gradient_matrix(i,j):
  global weights
  weight_tensor = torch.tensor(weights.copy(), requires_grad=True)
  prob_tensor = torch.exp(weight_tensor) / torch.sum(torch.exp(weight_tensor), dim=0)
  target_probability = prob_tensor[i][j]

  # Compute the gradient using automatic differentiation
  target_probability.backward()
  # The gradient is now stored in weights.grad
  gradient = weight_tensor.grad
  gradient_matrix = gradient.numpy()
  gradient_vector = gradient_matrix[:,j]
  return gradient_vector
#############################################################################################################################################
def update_weights(Mainlands, num_islands, alpha=0.1, nu=0.01):
    global weights
    num_mainlands = len(Mainlands)
    for j in range(num_islands):
        drone_score = []
        update = np.zeros(num_mainlands)
        for m in range(1, num_mainlands + 1):
            temp = []
            gradient = get_gradient_matrix(m-1,j)  # ∇𝑤𝜇(𝑚,𝑖)
            if(j == 0 ):
              performance = Mainlands[m-1].rover_score  # 𝑓𝑚,𝑖
            elif(j == 1):
              performance = Mainlands[m-1].excavator_score  # 𝑓𝑚,𝑖
            else:
              temp.append(Mainlands[m-1].drone_score)
              performance = Mainlands[m-1].drone_score  # 𝑓𝑚,𝑖
            log_term = math.log(prob[m - 1][j])  # 𝜈𝑙𝑜𝑔𝜇(𝑚,𝑖)
            update += gradient * (performance - nu * log_term)
            drone_score.append(temp)
        # print(f"performance {performance}")
        # print(f"log_term {log_term}")
        # print(f"update {update}")
        

        weights[:,j] += alpha * update  # Apply the update
    global store_weights, sites_interacted
    store_weights.append(copy.deepcopy(weights))
    sites_interacted.append(drone_score)
#############################################################################################################################################
def select_agents(num_mainlands, Islands):
  net_distribution = []
  num_islands = len(Islands)
  for m in range(num_mainlands):
    for i in range(num_islands):
      if Islands[i].agent_type == "Rover":
          rovers = random.choices(Islands[i].agents, k=int(len(Islands[i].agents)*prob[m][i]))
      elif Islands[i].agent_type == "Excavator":
          excavators = random.choices(Islands[i].agents, k=int(len(Islands[i].agents)*prob[m][i]))
      else:
          drones = random.choices(Islands[i].agents, k=int(len(Islands[i].agents)*prob[m][i]))

    net_distribution.append([[k.model for k in rovers],[k.model for k in excavators],[k.model for k in drones]])

  return net_distribution
#############################################################################################################################################
# Decay and Volatile
def Decay(site_list):
  for site in site_list:
    if site.type=="Unmarked":
      site.value *= 0.5
def Volatile(site_list, curr_ts):
  for site in site_list:
    if (site.type=="Marked") and (site.excavated==False):
      if curr_ts - site.time_marked > 7:
        site.type = "Unmarked"
        site.time_marked = None

In [8]:
# rollout and rollout_island has numIter hyperparameter [hardcoded]
num_iterations = 12
num_agents_per_island = 150
num_mainlands = 2
team_size = 10
pop_size = 50
elite_size = 10
mainland_mode = None
Island_m = Mainland(0,0,0,60, 60,15, None)
Island_m.initialize()
init_weights(num_mainlands,3)
distribution_from_weights(num_mainlands,3)
print(prob)

[[0.5 0.5 0.5]
 [0.5 0.5 0.5]]


In [9]:
store_weights = []
sites_interacted = []
highest_team_fitness = []

In [None]:
## This cell is for the policy migration code, which has 4 parts,
## 1) defining a softmax function to get the distribution of agents
## 2) Invoking N iterations of islands and mainlands
## 3) Sending elite teams back to the islands
## 4) Updating the softmax with a gradient

# Algorithm 3
## Initialize islands
Island_list = [Island("Rover", num_agents_per_island), Island("Excavator", num_agents_per_island), Island("Drone", num_agents_per_island)]
for Is in Island_list:
  Is.initialize_agents(Island_m.dim)

## Initialize Mainlands
# Mainland_list = [Mainland(pop_size,team_size,elite_size, 60,60, np.random.randint(1,20), mainland_mode) for _ in range(num_mainlands)]
Mainland_list = [Mainland(pop_size,team_size,elite_size, 60,60,20,"D"),Mainland(pop_size,team_size,elite_size, 60,60,20,None)]
for M in Mainland_list:
  M.initialize()

for k in range(num_iterations):

  # 𝑃𝑜𝑝_𝐼 = islands(𝑃𝑜𝑝_𝐼)
  for Is in Island_list:
    Is.update_policies(Island_m.sites,Is.agents, 50)
    Is.update_latent_space()
  # 𝑇_𝑀 = mainlands(𝑇_𝑀)
  for m, M in enumerate(Mainland_list):
    print(m,end="")
    agent_distribution = select_agents(num_mainlands,Island_list)
    rovers = agent_distribution[m][0]
    excavators = agent_distribution[m][1]
    drones = agent_distribution[m][2]
    M.update_teams(rovers, excavators,drones)
    M.run_algo(7)
  print()

  # 𝑃𝑜𝑝_𝑖 ← 𝑃𝑜𝑝_𝑖 ∪ 𝑇_(𝑚,𝑖)[0:𝑒] ∀𝑚 ∈ M
  highest_team_fitness.append([-1*np.inf for i in range(num_mainlands)])
  for m, M in enumerate(Mainland_list):
    E = M.get_elite()
    for elite in E:
      Island_list[0].add_agents(Island_m.dim,elite.R)
      Island_list[1].add_agents(Island_m.dim, elite.E)
      Island_list[2].add_agents(Island_m.dim, elite.D)
      if elite.fitness > highest_team_fitness[-1][m]:
            highest_team_fitness[-1][m] = elite.fitness
  for Is in Island_list:
    Is.update_latent_space()
  # 𝑤_[𝑘+1,𝑖] ← update(𝑤_[𝑘,𝑖])
  update_weights(Mainland_list,3)

  for m, M in enumerate(Mainland_list):
    # /* Replace ( |𝑇 | − 𝑒 ) teams by sampling islands */
    # 13 𝑇_𝑚 ← 𝑇_𝑚[0:𝑒]∪(|𝑇|−𝑒) ∼ 𝑤_(𝑘+1,𝑖), ∀𝑖 ∈ I
    Tm_0_e = M.get_elite()
    # add new teams from islands
    agent_distribution = select_agents(num_mainlands,Island_list)
    rovers = agent_distribution[m][0]
    excavators = agent_distribution[m][1]
    drones = agent_distribution[m][2]
    M.update_teams(rovers, excavators,drones)
    M.population = M.population[:-1*len(E)]
    # add elite teams back
    M.population += Tm_0_e

In [12]:
store_weights

[array([[-0.44166667,  0.33333333,  0.33333333],
        [ 1.10833333,  0.33333333,  0.33333333]]),
 array([[ 2.49028399,  0.33333333,  0.33333333],
        [-1.82361732,  0.33333333,  0.33333333]]),
 array([[ 2.85122025,  0.33333333,  0.33333333],
        [-2.18455358,  0.33333333,  0.33333333]]),
 array([[ 3.06363792,  0.33333333,  0.33333333],
        [-2.39697125,  0.33333333,  0.33333333]]),
 array([[ 3.29546584,  0.33333333,  0.33333333],
        [-2.62879917,  0.33333333,  0.33333333]]),
 array([[ 3.51434526,  0.33333333,  0.33333333],
        [-2.84767859,  0.33333333,  0.33333333]]),
 array([[ 3.69751763,  0.33333333,  0.33333333],
        [-3.03085097,  0.33333333,  0.33333333]]),
 array([[ 3.88002294,  0.33333333,  0.33333333],
        [-3.21335627,  0.33333333,  0.33333333]]),
 array([[ 4.0318509 ,  0.33333333,  0.33333333],
        [-3.36518423,  0.33333333,  0.33333333]]),
 array([[ 4.15676351,  0.33333333,  0.33333333],
        [-3.49009684,  0.33333333,  0.33333333]]),
