In [1]:
# Generic imports
import numpy as np 
import matplotlib.pyplot as plt

In [2]:
# How to allow for players with different memory capacity to play the game and reproduce?
# Are memory and reasoning decoupled? 
# If we add a history of oppenents a player played against should there be a separate memory capacity for that?

In [3]:
class Player:
    
    def __init__(self, identifier, memory_capacity):
        
        self.identifier = identifier
        self.brain = MLP(n_input=memory_capacity, n_hidden=4)
        self.history = []
        self.rewards = []
        
    def act(self, opponent_history):
        
        a = self.brain.forward(opponent_history)
        return a
        
    def append_history(self, action):
        self.history.append(action)
        
    def append_rewards(self, reward):
        self.rewards.append(reward)
        
    def reset_history(self):
        self.history = []  

In [4]:
class Environment:

    def __init__(self, player, game):
        
        self.player = player
        self.game = game

In [5]:
class PrisonersDilemma:

    def __init__(self, payoff_matrix):
        
        self.payoff_matrix = payoff_matrix
        self.actions = ['C', 'D']
        
    def simulate(self, p1, p2, n_rounds):  
        
        for n in range(n_rounds): 
            
            a1 = p1.act(p2.history)
            a2 = p2.act(p1.history)
            p1.append_history(a1[0])
            p2.append_history(a2[0])
            r = self.payoff_matrix[a1[0]][a2[0]]
            p1.append_rewards(r[0])
            p2.append_rewards(r[1])  
            
        # reset player history?
    
    def to_action(p_history):
        
        history = p_history.copy()
        for i, a in enumerate(history):          
            history[i] = self.actions[history[i]]   
            
        return history         

In [6]:
class MLP:

    def __init__(self, n_input, n_hidden, activation=lambda x: np.maximum(0,x)):
        
        self.W1 = np.random.normal(loc=-1, scale=2, size=(n_input, n_hidden))
        self.W2 = np.random.normal(loc=1, scale=2, size=(n_hidden, 1))
        self.f1 = activation
        self.f2 = lambda x: 1/(1 + np.exp(-x))
    
    def forward(self, X):
        
        X = np.array(X)
        
        if len(X.shape) == 1: 
            X = np.reshape(X, (1, X.shape[0]))
    
        n = X.shape[1]
        m = self.W1.shape[0]
        
        if n < m:
            X = np.hstack((np.random.randint(2, size=(X.shape[0], m-n)), X))
        
        else:
            X = X[:,-m:]
        
        output = self.f2(self.f1(X @ self.W1) @ self.W2)
        output = np.array(output >= 0.5, dtype=bool).reshape(output.shape[0],) * 1
        return output
         
    def get_weights(self):
        
        return self.W1, self.W2
    
    def set_W1(self, W1):
        
        self.W1 = W1
        
    def set_W2(self, W2):
        
        self.W2 = W2    

In [7]:
p1 = Player(identifier=1, memory_capacity=3)
p2 = Player(identifier=2, memory_capacity=3)

In [8]:
pm = [[(3,3), (1,5)], [(5,1), (2,2)]]
game = PrisonersDilemma(payoff_matrix=pm)

In [9]:
game.simulate(p1, p2, 10)

In [10]:
print(p1.history)
print(p2.history)

[1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [12]:
print(p2.rewards)

[2, 5, 5, 5, 5, 5, 5, 5, 5, 5]
