In [1]:
# Generic imports
import numpy as np 
import matplotlib.pyplot as plt

In [2]:
# How to allow for players with different memory capacity to play the game and reproduce?
# Are memory and reasoning decoupled? 
# If we add a history of oppenents a player played against should there be a separate memory capacity for that?

In [21]:
class Player:
    
    def __init__(self, identifier, memory_capacity):      
        self.identifier = identifier
        self.brain = MLP(n_input=memory_capacity, n_hidden=4)
        self.history = []
        self.rewards = []
        self.opponents = []
        
    def act(self, opponent_history):        
        a = self.brain.forward(opponent_history)
        return a
    
    def get_history(self):
        return self.history
        
    def append_history(self, action):       
        self.history.append(action)
        
    def append_rewards(self, reward):   
        self.rewards.append(reward)
        
    def reset_history(self):     
        self.history = []  
    
    def get_id(self):   
        return self.identifier
    
    def get_opponents(self):
        return self.opponents

In [22]:
class MLP:

    def __init__(self, n_input, n_hidden, activation=lambda x: np.maximum(0,x)): 
        self.W1 = np.random.normal(loc=0, scale=2, size=(n_input, n_hidden))
        self.W2 = np.random.normal(loc=0, scale=2, size=(n_hidden, 1))
        self.f1 = activation
    
    def forward(self, X):  
        X = np.array(X)
        if len(X.shape) == 1: 
            X = np.reshape(X, (1, X.shape[0]))
    
        n = X.shape[1]
        m = self.W1.shape[0]
        
        if n < m:
            X = np.hstack((np.random.randint(2, size=(X.shape[0], m-n)), X))
        else:
            X = X[:,-m:]
        
        output = self.f1(X @ self.W1) @ self.W2
        output = np.array(output >= 0, dtype=bool).reshape(output.shape[0],) * 1
        return output
         
    def get_weights(self):   
        return self.W1, self.W2
    
    def set_W1(self, W1):       
        self.W1 = W1
        
    def set_W2(self, W2):   
        self.W2 = W2    

In [23]:
class PrisonersDilemma:

    def __init__(self, payoff_matrix):
        self.payoff_matrix = payoff_matrix
        self.actions = ['C', 'D']
        
    def simulate2(self, p1, p2, n_games):
        for n in range(n_games):          
            a1 = p1.act(p2.get_history())
            a2 = p2.act(p1.get_history())
            p1.append_history(a1[0])
            p2.append_history(a2[0])
            r = self.payoff_matrix[a1[0]][a2[0]]
            p1.append_rewards(r[0])
            p2.append_rewards(r[1])  
            
        # reset player history?
    
    # Extremely tricky to parallelize, might be worth it though   
    def simulate(self, player, opponents, n_games):   
        o_histories = []
        o_actions = []
        for game_i in range(n_games):
            for i, o in enumerate(opponents):    
                o_histories.append(o.get_history()[0]) # can just index the first history cause deterministic actions
                a = o.act(player.get_history()[i])
                o.append_history(a)
                o_actions.append(a)
            p_actions = player.act(o_histories)
            # cannot slice list, have to find better way to append player history 
            
            
    def to_action(p_history): 
        history = p_history.copy()
        for i, a in enumerate(history):          
            history[i] = self.actions[history[i]]        
        return history  

In [20]:
class Environment:

    def __init__(self, players, game, fitness=lambda x: np.sum(x)):   
        self.players = players
        self.game = game
        self.fitness = fitness 
    
    def evolve(self, n_games, n_matchups, n_generations):     
        for p in self.players:
            opponents = sample_opponents(p, self.players, n_matchups)
            self.game.simulate(p, opponents, n_games)
        pass
    
    def sample_opponents(player, players, n_matchups):
        opponents = []
        n_matchups_played = len(player.get_opponents())
        n_matchups_remain = n_matchups - n_matchups_played
        while len(opponents) < n_matchups_remain:
            opponent_id = np.random.randint(len(players))
            if opponent_id == player.get_id():
                continue
            opponents.append(players[opponent_id])
        return opponents

In [10]:
players = [Player(identifier=i, memory_capacity=3) for i in range(10)]
game = PrisonersDilemma(payoff_matrix=[[(3,3), (1,5)], [(5,1), (2,2)]])
env = Environment(players=players, game=game)

9
9
7
8
1
4
9
9
3
3


In [14]:
p1 = Player(identifier=1, memory_capacity=3)
p2 = Player(identifier=2, memory_capacity=3)

In [None]:
pm = [[(3,3), (1,5)], [(5,1), (2,2)]]
game = PrisonersDilemma(payoff_matrix=pm)

In [None]:
game.simulate(p1, p2, 10)

In [None]:
print(p1.history)
print(p2.history)

In [None]:
print(p2.rewards)