In [1]:
from mesa import Agent, Model
from mesa.time import SimultaneousActivation
from mesa.space import MultiGrid
from mesa.datacollection import DataCollector
from mesa.visualization.modules import CanvasGrid, ChartModule
from mesa.visualization.ModularVisualization import ModularServer
from mesa.visualization.UserParam import Choice
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import random

In [7]:
class LearningAgent(Agent):
    def __init__(self, unique_id, model, row, learning_model):
        super().__init__(unique_id, model)
        self.row = row
        self.learning_model = learning_model  # 'RW' or 'TD'
        self.learning_rate = 0.1 # Rate of learning
        self.extinction_rate = 1.0 # Standard for RW extinction = 1, typically <1
        self.delta = 0.0 # Standard for delta = 0. Standard logstic for delta = 1, S-curve 0 < delta < 1
        self.beta = 1.0 # Responsivity to food in TD learning
        self.affect = 0.01 # Initial value outcome RW learning
        self.value_low = 0.0  # Initial value outcome TD learning
        self.value_high = 0.0 # Initial value outcome TD learning
        self.pcolor = None  # The current patch color where the agent is located
        self.food_consumed = None  # Food consumed status ('L' or 'H')
        self.p_low = 0.2  # True reward value of food type L
        self.p_high = 0.8  # True reward value of food type H
        self.lambda_val = 0.8  # Max reward for RW model

    def step(self): # Move agent to the right in grid space
        x, y = self.pos
        new_x = (x + 1) % self.model.grid.width
        self.model.grid.move_agent(self, (new_x, y))
        #print(f"I ate {str(self.food_consumed)}")
        #print(f"My TD reward learning is: {str(self.value_high)}")
        #print(f"My RW reward learning is: {str(self.affect)}")

        #Get current patch color
        self.pcolor = self.model.grid.get_cell_list_contents([self.pos])[0].type

        # Determine food consumed based on patch type
        if self.pcolor == 'HH':
            self.food_consumed = 'H'
        elif self.pcolor == 'LL':
            self.food_consumed = 'L'
        else:
            self.food_consumed = random.choice(['H', 'L']) #FOR NOW RANDOM. WILL DEPEND ON LEARNING MODELS.

        #Update learning
        if self.learning_model == 'RW':
            self.update_affect_rw()
        elif self.learning_model == 'TD':
            self.update_reward_td()

    def update_affect_rw(self):
        if self.food_consumed == 'H':
            self.affect += self.affect + (self.learning_rate * (self.affect ** self.delta) * (self.lambda_val - self.affect))
        else:
            self.affect += self.affect + (self.learning_rate * (self.affect ** self.delta) * self.extinction_rate * (0 - self.affect))

    def update_reward_td(self):
        if self.food_consumed == 'L':
            self.value_low += (self.value_low + self.learning_rate * (self.beta * self.p_low - self.value_low))
        else:
            self.value_high += (self.value_high + self.learning_rate * (self.beta * self.p_high - self.value_high))


class Patch(Agent):
    def __init__(self, unique_id, model, patch_type):
        super().__init__(unique_id, model)
        self.type = patch_type

    def get_color(self):
        if self.type == "HH":
            return "red"
        elif self.type == "LL":
            return "blue"
        elif self.type == "HL":
            return "purple"
        return "white"
    
class LearningModel(Model):
    def __init__(self, N, width, height, learning_model='RW', distribute_patches = 'random'):
        super().__init__()
        self.num_agents = N
        self.grid = MultiGrid(width, height, True)
        self.schedule = SimultaneousActivation(self)
        self.learning_model = learning_model

        #Create agents 
        for i in range(self.num_agents):
            agent = LearningAgent(i, self, row=i, learning_model=learning_model)
            self.grid.place_agent(agent, (0, i))
            self.schedule.add(agent)

        #Add patches with types based on different distributions
        if distribute_patches == 'random':
            self.distribute_randomly()
        elif distribute_patches == 'gradient_h':
            self.distribute_gradient_h()
        elif distribute_patches == 'gradient_l':
            self.distribute_gradient_l()

        self.datacollector = DataCollector(
            agent_reporters={"Affect": "affect", "Value_Low": "value_low", "Value_High": "value_high"}
        )

    def distribute_randomly(self):
        patch_types = ["HH", "LL", "HL"]
        for x in range(self.grid.width):
            for y in range(self.grid.height):
                patch_type = random.choice(patch_types)
                patch = Patch(f'patch_{x}_{y}', self, patch_type)
                self.grid.place_agent(patch, (x,y))

    def distribute_gradient_h(self):
        for y in range(self.grid.height):
            for x in range(self.grid.width):
                prob_hh = x / self.grid.width
                if random.random() < prob_hh:
                    patch_type = "HL"
                else:
                    patch_type = "HH"
                patch = Patch(f'patch_{x}_{y}', self, patch_type)
                self.grid.place_agent(patch, (x, y))

    
    def distribute_gradient_l(self):
        for y in range(self.grid.height):
            for x in range(self.grid.width):
                prob_ll = x / self.grid.width
                if random.random() < prob_ll:
                    patch_type = "HL"
                else:
                    patch_type = "LL"
                patch = Patch(f'patch_{x}_{y}', self, patch_type)
                self.grid.place_agent(patch, (x, y))

    def step(self):
        self.datacollector.collect(self)
        self.schedule.step()

    def visualize(self):
        grid_matrix = []
        for y in range(self.grid.height):
            row = []
            for x in range(self.grid.width):
                cell_content = self.grid.get_cell_list_contents([(x,y)])
                patch = next((obj for obj in cell_content if isinstance(obj, Patch)), None)
                if patch:
                    row.append(patch.get_color())
                else:
                    row.append("white")
            grid_matrix.append(row)

        fig, ax = plt.subplots(figsize=(10, 10))
        cmap = mcolors.ListedColormap(['red', 'blue', 'purple', 'white'])
        bounds = [0, 1, 2, 3, 4]
        norm = mcolors.BoundaryNorm(bounds, cmap.N)

        matrix = [[bounds.index(cmap.colors.index(color)) for color in row] for row in grid_matrix]
        ax.imshow(matrix, cmap = cmap, norm = norm)

        plt.show()    

In [None]:
# Running the model
model = LearningModel(N=100, width=100, height=100, learning_model='TD', distribute_patches = 'random')

for i in range(100):
    model.step()

df1 = model.datacollector.get_agent_vars_dataframe()

model.visualize() #In case you want to check distribution of food on map.

In [None]:
import seaborn as sns
import pandas as pd

df2 = df1.reset_index()
df2['avg_value'] = df2['Value_High'] / 100               

print(df2.head())

td = sns.lineplot(df2, x='Step', y = 'avg_value')
plt.show(td)

In [None]:
def agent_portrayal(agent):
    if isinstance(agent, Patch):
        portrayal = {"Shape": "rect",
                     "Filled": "true",
                     "Color": agent.get_color(),
                     "Layer": 0,
                     "w": 1,
                     "h": 1}
    else:
        portrayal = {"Shape": "circle",
                     "Filled": "true",
                     "Color": "white",
                     "Layer": 1,
                     "r": 1}
    return portrayal

# Create a grid visualization
grid = CanvasGrid(agent_portrayal, 100, 100, 500, 500)

# Create a chart for agent affects
chart = ChartModule(
    [{"Label": "Affect", "Color": "Black"}, {"Label": "Value_Low", "Color": "Blue"}, {"Label": "Value_High", "Color": "Red"}]
)

model_params = {
    "N": 100,
    "width": 100,
    "height": 100,
    "learning_model": Choice("Learning Model", value="RW", choices=["RW", "TD"]),
    "distribute_patches": Choice("Patch Distribution", value="random", choices=["random", "gradient_h", "gradient_l"]),
}

server = ModularServer(LearningModel, [grid, chart], "Learning Model", model_params)
server.port = 8521
server.launch()