In [None]:
import gym, ray
import numpy as np
import copy
import requests
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
ray.init(local_mode=True)

In [None]:
class IteratedPDEnv(ray.rllib.env.multi_agent_env.MultiAgentEnv):
    
    done  = False
    lastPlayer1Action = 0 # Cooperate
    agents = 2
    
    def __init__ (self, env_config):
        # Note: Our action space is for ONE player; namely, exactly two
        # choices:
        #  - Defect
        #  - Cooperate
        self.action_space      = gym.spaces.Discrete(2)
        self.observation_space = gym.spaces.Tuple((gym.spaces.Discrete(2), gym.spaces.Discrete(2)))

        self.reset()
    
    
    def reset (self):
        self.done = False
        
        # TODO: Use `self.agents` ...
        return { "agent_0": (0, 0)
               , "agent_1": (0, 0)
               }
    
    def action_to_int(self, action):
        if action == "Cooperate":
            return 0
        elif action == "Defect":
            return 1

    def int_to_action(self, i):
        if i == 0:
            return "Cooperate"
        elif i == 1:
            return "Defect"
    
    def step (self, action):
        # action is either 0 or 1.
        assert action in [0, 1], "Unknown action!"
        
        # TODO: Build this up to run one step per agent! I.e. just a for loop; then return the appropriate
        # dictionaries
        #
        # step :: .. -> [ Map Agent Observation 
        #               , Map Agent Reward
        #               , Map Agent Done
        #               , Map Agent Info
        #               ]
        
        player1Action = self.int_to_action(action)
        
        player2Action = "Cooperate"                                 # Goody-two-shoes
        player2Action = "Defect"                                    # Betrayal ...
        player2Action = self.int_to_action(self.lastPlayer1Action)  # Copycat! (tit-for-tat)
        
        # We are done once "step" is called; a round of the game is a single episode.
        self.done = True
        
        # Register the current move of the player so that the opponent can copy it in the next round
        self.lastPlayer1Action = action
        
        data = { "player1Action": player1Action
               , "player2Action": player2Action
               }
        
        # Do a post to the server; get the payoffs.
        response = requests.post("http://localhost:3000/play", json=data).json()
        
        reward = response["player1Payoff"]
        
        obs = (self.action_to_int (player1Action), self.action_to_int (player2Action))
        
        info = { "response": response,
                "lastPlayer1Action": self.lastPlayer1Action }
        
        return [ obs, reward, self.done, info ]