# Dream Teams

We will run tournaments pitting teams composed of specialist agents against random agents, trained teams of different cultures.


In [1]:
import os
import random
import time
import platform
import torch
import gym
import numpy as np
import pickle

# This is the Gathering Game Environment based on Tribal Organization of agents
from tribes_env import GatheringEnv
from tribes_model import *

import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

print("Python version: ", platform.python_version())
print("Pytorch version: {}".format(torch.__version__))
print("OpenAI Gym version: {}".format(gym.__version__))

Python version:  3.6.4
Pytorch version: 0.4.1.post2
OpenAI Gym version: 0.9.2


## Play 1 Game with Rendering

We will rewrite the code for playing one game rendered so that:

(1) each agent can be loaded with any specific trained model

In [None]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

# There will be 9 AI agents
agent_models = [
                'MA_models/3T-9L/pacifist/p-100.0/MA0_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA1_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA2_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA3_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA4_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA5_Gather_ep5000.p'    
                'MA_models/3T-9L/pacifist/p-100.0/MA6_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA7_Gather_ep5000.p'
                'MA_models/3T-9L/pacifist/p-100.0/MA8_Gather_ep5000.p'    
                ]

# There will be 9 AI agents and 0 random agent
num_ai_agents = len(agent_models)
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = True
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 1
max_frames = 300

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

# Load models for AI agents
agents= [[] for i in range(num_ai_agents)]

for i, model_file in enumerate(agent_models):
    try:
        with open(model_file, 'rb') as f:
            # Model File include both model and optim parameters
            saved_model = pickle.load(f)
            agents[i], _ = saved_model
            print("Load saved model for agent {}".format(i))
    except OSError:
        print('Model file not found.')
        raise

# Load random agents    
for i in range(num_ai_agents,num_agents):
    print("Load random agent {}".format(i))
    agents.append(Rdn_Policy())

# Initialize AI and random agent data
actions = [0 for i in range(num_agents)]
tags = [0 for i in range(num_agents)]

# Establish tribal association

tribes = []
tribes.append(Tribe(name='Vikings',color='blue', agents=[agents[0], agents[1], agents[2]]))
tribes.append(Tribe(name='Saxons', color='red', agents=[agents[3], agents[4], agents[5]]))
tribes.append(Tribe(name='Franks', color='purple', agents=[agents[6], agents[7], agents[8]]))
# tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!

# 3 tribes of 9 agents, used map defined in default.txt
agent_colors = [agent.color for agent in agents]
agent_tribes = [agent.tribe for agent in agents]
    
env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, map_name='default')    
    
for ep in range(max_episodes):
    
    US_hits = [0 for i in range(num_agents)]
    THEM_hits = [0 for i in range(num_agents)]

    env_obs = env.reset()  # Environment return observations
    """
    # For Debug only
    print (len(agents_obs))
    print (agents_obs[0].shape)
    """
    
    # Unpack observations into data structure compatible with agent Policy
    agents_obs = unpack_env_obs(env_obs)
    
    for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
        agents[i].reset_info()    
    
    env.render()
    time.sleep(1/15)  # Change speed of video rendering
    
    """
    # For Debug only
    print (len(agents_obs))
    print (agents_obs[0].shape)
    """
    
    """
    For now, we do not stack observations, and we do not implement LSTM
    
    state = np.stack([state]*num_frames)

    # Reset LSTM hidden units when episode begins
    cx = Variable(torch.zeros(1, 256))
    hx = Variable(torch.zeros(1, 256))
    """

    for frame in range(max_frames):

        for i in range(num_ai_agents):    # For AI agents
            actions[i], _ = select_action(agents[i], agents_obs[i], cuda=False)
            if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                tags[i] += 1   # record a tag for accessing aggressiveness
                
        for i in range(num_ai_agents, num_agents):   # For random agents
            actions[i] = agents[i].select_action(agents_obs[i])
            if actions[i] is 6:
                tags[i] += 1   # record a tag for accessing aggressiveness
        
        """
        For now, we do not implement LSTM
        # Select action
        action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
        """

        # if frame % 10 == 0:
        #     print (actions)    
            
        # Perform step        
        env_obs, reward, done, info = env.step(actions)
        
        """
        For Debug only
        print (env_obs)
        print (reward)
        print (done) 
        """

        for i in range(num_ai_agents):
            agents[i].rewards.append(reward[i])  # Stack rewards

        
        # Unpack observations into data structure compatible with agent Policy
        agents_obs = unpack_env_obs(env_obs)
        load_info(agents, narrate=False)   # Load agent info for AI agents
        
        for i in range(num_agents):
            US_hits[i] += agents[i].US_hit
            THEM_hits[i] += agents[i].THEM_hit
            
        """
        For now, we do not stack observation, may come in handy later on
        
        # Evict oldest diff add new diff to state
        next_state = np.stack([next_state]*num_frames)
        next_state[1:, :, :] = state[:-1, :, :]
        state = next_state
        """
        
        env.render()
        time.sleep(1/30)  # Change speed of video rendering

        if any(done):
            print("Done after {} frames".format(frame))
            break

env.close()  # Close the rendering window


# Print out statistics of AI agents

total_rewards = 0
total_tags = 0
total_US_hits = 0
total_THEM_hits = 0

print ('\nStatistics by Agent')
print ('===================')
for i in range(num_ai_agents):
    agent_tags = sum(agents[i].tag_hist)
    total_tags += agent_tags
    print ("Agent{} aggressiveness is {:.2f}".format(i, sum(agents[i].tag_hist)/frame))

    agent_reward = sum(agents[i].rewards)
    total_rewards += agent_reward
    print ("Agent{} reward is {:d}".format(i, agent_reward))

    agent_US_hits = sum(agents[i].US_hits)
    agent_THEM_hits = sum(agents[i].THEM_hits)
    total_US_hits += agent_US_hits
    total_THEM_hits += agent_THEM_hits

    print('US agents hit = {}'.format(agent_US_hits))
    print('THEM agents hit = {}'.format(agent_THEM_hits ))

print ('\nStatistics in Aggregate')
print ('=======================')
print ('Total rewards gathered = {}'.format(total_rewards))
print ('Av. rewards per agent = {0:.2f}'.format(total_rewards/num_ai_agents))
print ('Num laser fired = {}'.format(total_tags))
print ('Total US Hit (friendly fire) = {}'.format(total_US_hits))
print ('Total THEM Hit = {}'.format(total_THEM_hits))
print ('friendly fire (%) = {0:.3f}'.format(total_US_hits/(total_US_hits+total_THEM_hits+1e-7)))

print ('\nStatistics by Team')
print ('===================')
top_tribe = None
top_tribe_reward = 0

for i, tribe in enumerate(tribes):
    if tribe.name is not 'Crazies':
        tribe_reward = sum(tribe.sum_rewards())
        print ('Tribe {} has total reward of {}'.format(tribe.name, tribe_reward))
                           
        if tribe_reward > top_tribe_reward:   # Keep track of dominating team
            top_tribe_reward = tribe_reward
            top_tribe = tribe.name

# Team dominance calculation                           
print ('Dominating Team: {}'.format(top_tribe))
dominance = top_tribe_reward/((total_rewards-top_tribe_reward+1.1e-7)/(len(tribes)-1))    
print ('Team dominance: {0:.2f}x'.format(dominance))


## Performance Stats - Individualist

Our research requires the gathering of these agent and team statistics averaged over 30 episodes of game play:

* Average agent reward - average number of apples gathered per agent per episode  
* The dominating team per game episode  
* Team dominance of the dominating team, defined by the following ratio:  
    Apples gathered by dominating team / average( Apples gathered by other competing teams)


In [18]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

dir_names = ['MA_models/3T-9L/individualist/']
episodes = [1000,2000,3000,4000,5000]  
culture = "individualist"

# Performance Statistics - for Research Report
av_agent_reward = [[0 for i in episodes] for j in dir_names]
dominating_tribe = [[None for i in episodes] for j in dir_names]
dom_tribe_reward = [[0 for i in episodes] for j in dir_names]
dominance = [[0 for i in episodes] for j in dir_names]

# There will be 9 agents - 3 teams of 3 AI agents each and 0 random agent
num_ai_agents = 9
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = False
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 30
max_frames = 1000
verbose = False

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

for dir_num, dir_name in enumerate(dir_names):
    print ("###### Dir = {} #######".format(dir_name))
    
    for eps_num, eps in enumerate(episodes):
        print ("###### Trained episodes = {} #######".format(eps))
    
        # Load models for AI agents
        agents= [[] for i in range(num_ai_agents)]
        # If episodes is provided (not 0), load the model for each AI agent
        for i in range(num_ai_agents):
            model_file = dir_name+'MA{}_Gather_ep{}.p'.format(i,eps)
            try:
                with open(model_file, 'rb') as f:
                    # Model File include both model and optim parameters
                    saved_model = pickle.load(f)
                    agents[i], _ = saved_model
                    print("Load saved model for agent {}".format(i))
            except OSError:
                print('Model file not found.')
                raise

        # Load random agents    
        for i in range(num_ai_agents,num_agents):
            print("Load random agent {}".format(i))
            agents.append(Rdn_Policy())
        
        # Establish tribal association
        tribes = []
        tribes.append(Tribe(name='Vikings',color='blue', culture=culture, \
                    agents=[agents[0], agents[1], agents[2]]))
        tribes.append(Tribe(name='Saxons', color='red', culture=culture, \
                    agents=[agents[3], agents[4], agents[5]]))
        tribes.append(Tribe(name='Franks', color='purple', culture=culture, \
                    agents=[agents[6], agents[7], agents[8]]))
        # tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!


        # 9 agents in 4 tribes, used map defined in default.txt
        agent_colors = [agent.color for agent in agents]
        agent_tribes = [agent.tribe for agent in agents]

        env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, \
                       map_name='default')    

        # Used to accumulate episode stats for averaging
        cum_rewards = 0
        cum_tags = 0
        cum_US_hits = 0
        cum_THEM_hits = 0
        cum_agent_rewards = [0 for agent in agents]
        cum_agent_tags = [0 for agent in agents]
        cum_agent_US_hits = [0 for agent in agents]
        cum_agent_THEM_hits = [0 for agent in agents]
        cum_tribe_rewards = [0 for t in tribes if t.name is not 'Crazies']

        cuda = False
        start = time.time()

        for ep in range(max_episodes):
    
            print('.', end='')  # To show progress
    
            # Initialize AI and random agent data
            actions = [0 for i in range(num_agents)]
            tags = [0 for i in range(num_agents)]
            US_hits = [0 for i in range(num_agents)]
            THEM_hits = [0 for i in range(num_agents)]

            env_obs = env.reset()  # Environment return observations
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            # Unpack observations into data structure compatible with agent Policy
            agents_obs = unpack_env_obs(env_obs)
    
            for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
                agents[i].reset_info()    
    
            if render:
                env.render()
                time.sleep(1/15)  # Change speed of video rendering
    
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            """
            For now, we do not stack observations, and we do not implement LSTM
    
            state = np.stack([state]*num_frames)

            # Reset LSTM hidden units when episode begins
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
            """

            for frame in range(max_frames):

                for i in range(num_ai_agents):    # For AI agents
                    actions[i], _ = select_action(agents[i], agents_obs[i], cuda=cuda)
                    if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                        tags[i] += 1   # record a tag for accessing aggressiveness
                
                for i in range(num_ai_agents, num_agents):   # For random agents
                    actions[i] = agents[i].select_action(agents_obs[i])
                    if actions[i] is 6:
                        tags[i] += 1   # record a tag for accessing aggressiveness
        
                """
                For now, we do not implement LSTM
                # Select action
                action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
                """

                # if frame % 10 == 0:
                #     print (actions)    
            
                # Perform step        
                env_obs, reward, done, info = env.step(actions)
        
                """
                For Debug only
                print (env_obs)
                print (reward)
                print (done) 
                """

                for i in range(num_ai_agents):
                    agents[i].rewards.append(reward[i])  # Stack rewards

        
                # Unpack observations into data structure compatible with agent Policy
                agents_obs = unpack_env_obs(env_obs)
                load_info(agents, narrate=False)   # Load agent info for AI agents
        
                for i in range(num_agents):
                    US_hits[i] += agents[i].US_hit
                    THEM_hits[i] += agents[i].THEM_hit
            
                """
                For now, we do not stack observation, may come in handy later on
        
                # Evict oldest diff add new diff to state
                next_state = np.stack([next_state]*num_frames)
                next_state[1:, :, :] = state[:-1, :, :]
                state = next_state
                """
        
                if render:
                    env.render()
                    time.sleep(1/15)  # Change speed of video rendering

                if any(done):
                    print("Done after {} frames".format(frame))
                    break
            
            # Print out statistics of AI agents
            ep_rewards = 0
            ep_tags = 0
            ep_US_hits = 0
            ep_THEM_hits = 0

            if verbose:
                print ('\nStatistics by Agent')
                print ('===================')
            for i in range(num_ai_agents):
                agent_tags = sum(agents[i].tag_hist)
                ep_tags += agent_tags
                cum_agent_tags[i] += agent_tags

                agent_reward = sum(agents[i].rewards)
                ep_rewards += agent_reward
                cum_agent_rewards[i] += agent_reward

                agent_US_hits = sum(agents[i].US_hits)
                agent_THEM_hits = sum(agents[i].THEM_hits)
                ep_US_hits += agent_US_hits
                ep_THEM_hits += agent_THEM_hits
                cum_agent_US_hits[i] += agent_US_hits
                cum_agent_THEM_hits[i] += agent_THEM_hits
        
                if verbose:
                    print ("Agent{} aggressiveness is {:.2f}".format(i, agent_tags/frame))
                    print ("Agent{} reward is {:d}".format(i, agent_reward))
                    print('US agents hit = {}'.format(agent_US_hits))
                    print('THEM agents hit = {}'.format(agent_THEM_hits ))
        
            cum_rewards += ep_rewards
            cum_tags += ep_tags
            cum_US_hits += ep_US_hits
            cum_THEM_hits += ep_THEM_hits
    
            if verbose:
                print ('\nStatistics in Aggregate')
                print ('=======================')
                print ('Total rewards gathered = {}'.format(ep_rewards))
                print ('Num laser fired = {}'.format(ep_tags))
                print ('Total US Hit (friendly fire) = {}'.format(ep_US_hits))
                print ('Total THEM Hit = {}'.format(ep_THEM_hits))
                print ('friendly fire (%) = {0:.3f}'.format(ep_US_hits/(ep_US_hits+ep_THEM_hits+1e-7)))

            if verbose:
                print ('\nStatistics by Tribe')
                print ('===================')
            for i, t in enumerate(tribes):
                if t.name is not 'Crazies':
                    ep_tribe_reward = sum(t.sum_rewards())
                    cum_tribe_rewards[i] += ep_tribe_reward
                    if verbose:
                        print ('Tribe {} has total reward of {}'.format(t.name, ep_tribe_reward))

            for i in range(num_ai_agents):
                agents[i].clear_history()

        env.close()  # Close the rendering window
        end = time.time()

        print ('\nAverage Statistics in Aggregate')
        print ('=================================')
        total_rewards = cum_rewards/max_episodes
        print ('Total rewards gathered = {:.1f}'.format(total_rewards))
        av_agent_reward[dir_num][eps_num] = cum_rewards/max_episodes/num_ai_agents
        print ('Av. agent reward = {:.2f}'.format(av_agent_reward[dir_num][eps_num]))
        print ('Num laser fired = {:.1f}'.format(cum_tags/max_episodes))
        print ('Total US Hit (friendly fire) = {:.1f}'.format(cum_US_hits/max_episodes))
        print ('Total THEM Hit = {:.1f}'.format(cum_THEM_hits/max_episodes))
        print ('friendly fire (%) = {:.3f}'.format(cum_US_hits/(cum_US_hits+cum_THEM_hits+1e-7)))

        print ('\nAverage Statistics by Tribe')
        print ('=============================')
       
        for i, tribe in enumerate(tribes):
            if tribe.name is not 'Crazies':
                tribe_reward = cum_tribe_rewards[i]/max_episodes
                print ('Tribe {} has total reward of {:.1f}'.format(tribe.name, tribe_reward))    
                
                # Keep track of dominating team and the rewards gathered
                if tribe_reward > dom_tribe_reward[dir_num][eps_num]:   
                    dom_tribe_reward[dir_num][eps_num] = tribe_reward
                    dominating_tribe[dir_num][eps_num]  = tribe.name

        # Team dominance calculation                           
        print ('Dominating Tribe: {}'.format(dominating_tribe[dir_num][eps_num]))
        dominance[dir_num][eps_num] = dom_tribe_reward[dir_num][eps_num]/((total_rewards - \
                                                dom_tribe_reward[dir_num][eps_num]+1.1e-7)/(len(tribes)-1))    
        print ('Team dominance: {0:.2f}x'.format(dominance[dir_num][eps_num]))

        print ('\nAverage Statistics by Agent')
        print ('=============================')
        for i in range(num_ai_agents):
            print ("Agent{} of {} aggressiveness is {:.2f}".format(i, agents[i].tribe, \
                                                           cum_agent_tags[i]/(max_episodes*max_frames)))
            print ("Agent{} reward is {:.1f}".format(i, cum_agent_rewards[i]/max_episodes))
            print('US agents hit = {:.1f}'.format(cum_agent_US_hits[i]/max_episodes))
            print('THEM agents hit = {:.1f}'.format(cum_agent_THEM_hits[i]/max_episodes))

        print('Training time per epochs: {:.2f} sec'.format((end-start)/max_episodes))

# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)


###### Dir = MA_models/3T-9L/individualist/ #######
###### Trained episodes = 1000 #######
Load saved model for agent 0
Load saved model for agent 1
Load saved model for agent 2
Load saved model for agent 3
Load saved model for agent 4
Load saved model for agent 5
Load saved model for agent 6
Load saved model for agent 7
Load saved model for agent 8
..............................
Average Statistics in Aggregate
Total rewards gathered = 371.1
Av. agent reward = 41.23
Num laser fired = 404.5
Total US Hit (friendly fire) = 56.9
Total THEM Hit = 137.3
friendly fire (%) = 0.293

Average Statistics by Tribe
Tribe Vikings has total reward of 18.2
Tribe Saxons has total reward of 168.8
Tribe Franks has total reward of 184.0
Dominating Tribe: Franks
Team dominance: 1.97x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.03
Agent0 reward is 0.6
US agents hit = 6.1
THEM agents hit = 6.4
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 17.7
US agents hit = 0.2
THEM agents

## Performance Stats - Pacifists

In [6]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

dir_names = ["MA_models/3T-9L/pacifist/p-0.01/",
             "MA_models/3T-9L/pacifist/p-0.1/", 
             "MA_models/3T-9L/pacifist/p-1.0/",
             "MA_models/3T-9L/pacifist/p-10.0/",
             "MA_models/3T-9L/pacifist/p-100.0/"]
episodes = [1000,2000,3000,4000,5000]  
culture = "pacifist"

# Performance Statistics - for Research Report
av_agent_reward = [[0 for i in episodes] for j in dir_names]
dominating_tribe = [[None for i in episodes] for j in dir_names]
dom_tribe_reward = [[0 for i in episodes] for j in dir_names]
dominance = [[0 for i in episodes] for j in dir_names]

# There will be 9 agents - 3 teams of 3 AI agents each and 0 random agent
num_ai_agents = 9
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = False
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 30
max_frames = 1000
verbose = False

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

for dir_num, dir_name in enumerate(dir_names):
    print ("###### Dir = {} #######".format(dir_name))
    
    for eps_num, eps in enumerate(episodes):
        print ("###### Trained episodes = {} #######".format(eps))
    
        # Load models for AI agents
        agents= [[] for i in range(num_ai_agents)]
        # If episodes is provided (not 0), load the model for each AI agent
        for i in range(num_ai_agents):
            model_file = dir_name+'MA{}_Gather_ep{}.p'.format(i,eps)
            try:
                with open(model_file, 'rb') as f:
                    # Model File include both model and optim parameters
                    saved_model = pickle.load(f)
                    agents[i], _ = saved_model
                    print("Load saved model for agent {}".format(i))
            except OSError:
                print('Model file not found.')
                raise

        # Load random agents    
        for i in range(num_ai_agents,num_agents):
            print("Load random agent {}".format(i))
            agents.append(Rdn_Policy())
        
        # Establish tribal association
        tribes = []
        tribes.append(Tribe(name='Vikings',color='blue', culture=culture, \
                    agents=[agents[0], agents[1], agents[2]]))
        tribes.append(Tribe(name='Saxons', color='red', culture=culture, \
                    agents=[agents[3], agents[4], agents[5]]))
        tribes.append(Tribe(name='Franks', color='purple', culture=culture, \
                    agents=[agents[6], agents[7], agents[8]]))
        # tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!


        # 9 agents in 4 tribes, used map defined in default.txt
        agent_colors = [agent.color for agent in agents]
        agent_tribes = [agent.tribe for agent in agents]

        env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, \
                       map_name='default')    

        # Used to accumulate episode stats for averaging
        cum_rewards = 0
        cum_tags = 0
        cum_US_hits = 0
        cum_THEM_hits = 0
        cum_agent_rewards = [0 for agent in agents]
        cum_agent_tags = [0 for agent in agents]
        cum_agent_US_hits = [0 for agent in agents]
        cum_agent_THEM_hits = [0 for agent in agents]
        cum_tribe_rewards = [0 for t in tribes if t.name is not 'Crazies']

        cuda = False
        start = time.time()

        for ep in range(max_episodes):
    
            print('.', end='')  # To show progress
    
            # Initialize AI and random agent data
            actions = [0 for i in range(num_agents)]
            tags = [0 for i in range(num_agents)]
            US_hits = [0 for i in range(num_agents)]
            THEM_hits = [0 for i in range(num_agents)]

            env_obs = env.reset()  # Environment return observations
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            # Unpack observations into data structure compatible with agent Policy
            agents_obs = unpack_env_obs(env_obs)
    
            for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
                agents[i].reset_info()    
    
            if render:
                env.render()
                time.sleep(1/15)  # Change speed of video rendering
    
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            """
            For now, we do not stack observations, and we do not implement LSTM
    
            state = np.stack([state]*num_frames)

            # Reset LSTM hidden units when episode begins
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
            """

            for frame in range(max_frames):

                for i in range(num_ai_agents):    # For AI agents
                    actions[i], _ = select_action(agents[i], agents_obs[i], cuda=cuda)
                    if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                        tags[i] += 1   # record a tag for accessing aggressiveness
                
                for i in range(num_ai_agents, num_agents):   # For random agents
                    actions[i] = agents[i].select_action(agents_obs[i])
                    if actions[i] is 6:
                        tags[i] += 1   # record a tag for accessing aggressiveness
        
                """
                For now, we do not implement LSTM
                # Select action
                action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
                """

                # if frame % 10 == 0:
                #     print (actions)    
            
                # Perform step        
                env_obs, reward, done, info = env.step(actions)
        
                """
                For Debug only
                print (env_obs)
                print (reward)
                print (done) 
                """

                for i in range(num_ai_agents):
                    agents[i].rewards.append(reward[i])  # Stack rewards

        
                # Unpack observations into data structure compatible with agent Policy
                agents_obs = unpack_env_obs(env_obs)
                load_info(agents, narrate=False)   # Load agent info for AI agents
        
                for i in range(num_agents):
                    US_hits[i] += agents[i].US_hit
                    THEM_hits[i] += agents[i].THEM_hit
            
                """
                For now, we do not stack observation, may come in handy later on
        
                # Evict oldest diff add new diff to state
                next_state = np.stack([next_state]*num_frames)
                next_state[1:, :, :] = state[:-1, :, :]
                state = next_state
                """
        
                if render:
                    env.render()
                    time.sleep(1/15)  # Change speed of video rendering

                if any(done):
                    print("Done after {} frames".format(frame))
                    break
            
            # Print out statistics of AI agents
            ep_rewards = 0
            ep_tags = 0
            ep_US_hits = 0
            ep_THEM_hits = 0

            if verbose:
                print ('\nStatistics by Agent')
                print ('===================')
            for i in range(num_ai_agents):
                agent_tags = sum(agents[i].tag_hist)
                ep_tags += agent_tags
                cum_agent_tags[i] += agent_tags

                agent_reward = sum(agents[i].rewards)
                ep_rewards += agent_reward
                cum_agent_rewards[i] += agent_reward

                agent_US_hits = sum(agents[i].US_hits)
                agent_THEM_hits = sum(agents[i].THEM_hits)
                ep_US_hits += agent_US_hits
                ep_THEM_hits += agent_THEM_hits
                cum_agent_US_hits[i] += agent_US_hits
                cum_agent_THEM_hits[i] += agent_THEM_hits
        
                if verbose:
                    print ("Agent{} aggressiveness is {:.2f}".format(i, agent_tags/frame))
                    print ("Agent{} reward is {:d}".format(i, agent_reward))
                    print('US agents hit = {}'.format(agent_US_hits))
                    print('THEM agents hit = {}'.format(agent_THEM_hits ))
        
            cum_rewards += ep_rewards
            cum_tags += ep_tags
            cum_US_hits += ep_US_hits
            cum_THEM_hits += ep_THEM_hits
    
            if verbose:
                print ('\nStatistics in Aggregate')
                print ('=======================')
                print ('Total rewards gathered = {}'.format(ep_rewards))
                print ('Num laser fired = {}'.format(ep_tags))
                print ('Total US Hit (friendly fire) = {}'.format(ep_US_hits))
                print ('Total THEM Hit = {}'.format(ep_THEM_hits))
                print ('friendly fire (%) = {0:.3f}'.format(ep_US_hits/(ep_US_hits+ep_THEM_hits+1e-7)))

            if verbose:
                print ('\nStatistics by Tribe')
                print ('===================')
            for i, t in enumerate(tribes):
                if t.name is not 'Crazies':
                    ep_tribe_reward = sum(t.sum_rewards())
                    cum_tribe_rewards[i] += ep_tribe_reward
                    if verbose:
                        print ('Tribe {} has total reward of {}'.format(t.name, ep_tribe_reward))

            for i in range(num_ai_agents):
                agents[i].clear_history()

        env.close()  # Close the rendering window
        end = time.time()

        print ('\nAverage Statistics in Aggregate')
        print ('=================================')
        total_rewards = cum_rewards/max_episodes
        print ('Total rewards gathered = {:.1f}'.format(total_rewards))
        av_agent_reward[dir_num][eps_num] = cum_rewards/max_episodes/num_ai_agents
        print ('Av. agent reward = {:.2f}'.format(av_agent_reward[dir_num][eps_num]))
        print ('Num laser fired = {:.1f}'.format(cum_tags/max_episodes))
        print ('Total US Hit (friendly fire) = {:.1f}'.format(cum_US_hits/max_episodes))
        print ('Total THEM Hit = {:.1f}'.format(cum_THEM_hits/max_episodes))
        print ('friendly fire (%) = {:.3f}'.format(cum_US_hits/(cum_US_hits+cum_THEM_hits+1e-7)))

        print ('\nAverage Statistics by Tribe')
        print ('=============================')
       
        for i, tribe in enumerate(tribes):
            if tribe.name is not 'Crazies':
                tribe_reward = cum_tribe_rewards[i]/max_episodes
                print ('Tribe {} has total reward of {:.1f}'.format(tribe.name, tribe_reward))    
                
                # Keep track of dominating team and the rewards gathered
                if tribe_reward > dom_tribe_reward[dir_num][eps_num]:   
                    dom_tribe_reward[dir_num][eps_num] = tribe_reward
                    dominating_tribe[dir_num][eps_num]  = tribe.name

        # Team dominance calculation                           
        print ('Dominating Tribe: {}'.format(dominating_tribe[dir_num][eps_num]))
        dominance[dir_num][eps_num] = dom_tribe_reward[dir_num][eps_num]/((total_rewards - \
                                                dom_tribe_reward[dir_num][eps_num]+1.1e-7)/(len(tribes)-1))    
        print ('Team dominance: {0:.2f}x'.format(dominance[dir_num][eps_num]))

        print ('\nAverage Statistics by Agent')
        print ('=============================')
        for i in range(num_ai_agents):
            print ("Agent{} of {} aggressiveness is {:.2f}".format(i, agents[i].tribe, \
                                                           cum_agent_tags[i]/(max_episodes*max_frames)))
            print ("Agent{} reward is {:.1f}".format(i, cum_agent_rewards[i]/max_episodes))
            print('US agents hit = {:.1f}'.format(cum_agent_US_hits[i]/max_episodes))
            print('THEM agents hit = {:.1f}'.format(cum_agent_THEM_hits[i]/max_episodes))

        print('Training time per epochs: {:.2f} sec'.format((end-start)/max_episodes))

# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)

###### Dir = MA_models/3T-9L/pacifist/p-0.01/ #######
###### Trained episodes = 1000 #######
Load saved model for agent 0
Load saved model for agent 1
Load saved model for agent 2
Load saved model for agent 3
Load saved model for agent 4
Load saved model for agent 5
Load saved model for agent 6
Load saved model for agent 7
Load saved model for agent 8
..............................
Average Statistics in Aggregate
Total rewards gathered = 386.5
Av. agent reward = 42.95
Num laser fired = 281.6
Total US Hit (friendly fire) = 40.8
Total THEM Hit = 108.7
friendly fire (%) = 0.273

Average Statistics by Tribe
Tribe Vikings has total reward of 140.9
Tribe Saxons has total reward of 50.8
Tribe Franks has total reward of 194.8
Dominating Tribe: Franks
Team dominance: 2.03x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.17
Agent0 reward is 106.9
US agents hit = 16.8
THEM agents hit = 61.3
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 12.4
US agents hit = 0.0
THEM 

..............................
Average Statistics in Aggregate
Total rewards gathered = 511.4
Av. agent reward = 56.82
Num laser fired = 130.3
Total US Hit (friendly fire) = 36.0
Total THEM Hit = 80.2
friendly fire (%) = 0.310

Average Statistics by Tribe
Tribe Vikings has total reward of 75.3
Tribe Saxons has total reward of 116.4
Tribe Franks has total reward of 319.7
Dominating Tribe: Franks
Team dominance: 3.33x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 0.0
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 30.7
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 44.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 34.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 54.5
US agents hit = 0.0
THEM agents hit = 0.1
Agent5 of Saxons aggressivene

..............................
Average Statistics in Aggregate
Total rewards gathered = 691.8
Av. agent reward = 76.87
Num laser fired = 0.7
Total US Hit (friendly fire) = 0.7
Total THEM Hit = 1.3
friendly fire (%) = 0.333

Average Statistics by Tribe
Tribe Vikings has total reward of 232.6
Tribe Saxons has total reward of 243.9
Tribe Franks has total reward of 215.2
Dominating Tribe: Saxons
Team dominance: 1.09x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 72.0
US agents hit = 0.1
THEM agents hit = 0.3
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 67.9
US agents hit = 0.0
THEM agents hit = 0.1
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 92.7
US agents hit = 0.0
THEM agents hit = 0.2
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 80.5
US agents hit = 0.1
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 89.2
US agents hit = 0.1
THEM agents hit = 0.2
Agent5 of Saxons aggressiveness

..............................
Average Statistics in Aggregate
Total rewards gathered = 655.3
Av. agent reward = 72.81
Num laser fired = 0.0
Total US Hit (friendly fire) = 0.0
Total THEM Hit = 0.0
friendly fire (%) = 0.000

Average Statistics by Tribe
Tribe Vikings has total reward of 230.9
Tribe Saxons has total reward of 206.1
Tribe Franks has total reward of 218.2
Dominating Tribe: Vikings
Team dominance: 1.09x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 79.8
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 84.8
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 66.3
US agents hit = 0.0
THEM agents hit = 0.0
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 57.9
US agents hit = 0.0
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 66.8
US agents hit = 0.0
THEM agents hit = 0.0
Agent5 of Saxons aggressivenes

..............................
Average Statistics in Aggregate
Total rewards gathered = 697.5
Av. agent reward = 77.50
Num laser fired = 0.1
Total US Hit (friendly fire) = 0.0
Total THEM Hit = 0.1
friendly fire (%) = 0.000

Average Statistics by Tribe
Tribe Vikings has total reward of 225.9
Tribe Saxons has total reward of 236.1
Tribe Franks has total reward of 235.5
Dominating Tribe: Saxons
Team dominance: 1.02x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 63.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 78.5
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 83.8
US agents hit = 0.0
THEM agents hit = 0.1
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 86.9
US agents hit = 0.0
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 82.8
US agents hit = 0.0
THEM agents hit = 0.0
Agent5 of Saxons aggressiveness

## Performance Stats - No_Fragging

In [8]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

dir_names = ["MA_models/3T-9L/no_fragging/p-0.01/",
             "MA_models/3T-9L/no_fragging/p-0.1/", 
             "MA_models/3T-9L/no_fragging/p-1.0/",
             "MA_models/3T-9L/no_fragging/p-10.0/",
             "MA_models/3T-9L/no_fragging/p-100.0/"]
episodes = [1000,2000,3000,4000,5000]  
culture = "no_fragging"

# Performance Statistics - for Research Report
av_agent_reward = [[0 for i in episodes] for j in dir_names]
dominating_tribe = [[None for i in episodes] for j in dir_names]
dom_tribe_reward = [[0 for i in episodes] for j in dir_names]
dominance = [[0 for i in episodes] for j in dir_names]

# There will be 9 agents - 3 teams of 3 AI agents each and 0 random agent
num_ai_agents = 9
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = False
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 30
max_frames = 1000
verbose = False

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

for dir_num, dir_name in enumerate(dir_names):
    print ("###### Dir = {} #######".format(dir_name))
    
    for eps_num, eps in enumerate(episodes):
        print ("###### Trained episodes = {} #######".format(eps))
    
        # Load models for AI agents
        agents= [[] for i in range(num_ai_agents)]
        # If episodes is provided (not 0), load the model for each AI agent
        for i in range(num_ai_agents):
            model_file = dir_name+'MA{}_Gather_ep{}.p'.format(i,eps)
            try:
                with open(model_file, 'rb') as f:
                    # Model File include both model and optim parameters
                    saved_model = pickle.load(f)
                    agents[i], _ = saved_model
                    print("Load saved model for agent {}".format(i))
            except OSError:
                print('Model file not found.')
                raise

        # Load random agents    
        for i in range(num_ai_agents,num_agents):
            print("Load random agent {}".format(i))
            agents.append(Rdn_Policy())
        
        # Establish tribal association
        tribes = []
        tribes.append(Tribe(name='Vikings',color='blue', culture=culture, \
                    agents=[agents[0], agents[1], agents[2]]))
        tribes.append(Tribe(name='Saxons', color='red', culture=culture, \
                    agents=[agents[3], agents[4], agents[5]]))
        tribes.append(Tribe(name='Franks', color='purple', culture=culture, \
                    agents=[agents[6], agents[7], agents[8]]))
        # tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!


        # 9 agents in 4 tribes, used map defined in default.txt
        agent_colors = [agent.color for agent in agents]
        agent_tribes = [agent.tribe for agent in agents]

        env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, \
                       map_name='default')    

        # Used to accumulate episode stats for averaging
        cum_rewards = 0
        cum_tags = 0
        cum_US_hits = 0
        cum_THEM_hits = 0
        cum_agent_rewards = [0 for agent in agents]
        cum_agent_tags = [0 for agent in agents]
        cum_agent_US_hits = [0 for agent in agents]
        cum_agent_THEM_hits = [0 for agent in agents]
        cum_tribe_rewards = [0 for t in tribes if t.name is not 'Crazies']

        cuda = False
        start = time.time()

        for ep in range(max_episodes):
    
            print('.', end='')  # To show progress
    
            # Initialize AI and random agent data
            actions = [0 for i in range(num_agents)]
            tags = [0 for i in range(num_agents)]
            US_hits = [0 for i in range(num_agents)]
            THEM_hits = [0 for i in range(num_agents)]

            env_obs = env.reset()  # Environment return observations
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            # Unpack observations into data structure compatible with agent Policy
            agents_obs = unpack_env_obs(env_obs)
    
            for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
                agents[i].reset_info()    
    
            if render:
                env.render()
                time.sleep(1/15)  # Change speed of video rendering
    
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            """
            For now, we do not stack observations, and we do not implement LSTM
    
            state = np.stack([state]*num_frames)

            # Reset LSTM hidden units when episode begins
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
            """

            for frame in range(max_frames):

                for i in range(num_ai_agents):    # For AI agents
                    actions[i], _ = select_action(agents[i], agents_obs[i], cuda=cuda)
                    if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                        tags[i] += 1   # record a tag for accessing aggressiveness
                
                for i in range(num_ai_agents, num_agents):   # For random agents
                    actions[i] = agents[i].select_action(agents_obs[i])
                    if actions[i] is 6:
                        tags[i] += 1   # record a tag for accessing aggressiveness
        
                """
                For now, we do not implement LSTM
                # Select action
                action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
                """

                # if frame % 10 == 0:
                #     print (actions)    
            
                # Perform step        
                env_obs, reward, done, info = env.step(actions)
        
                """
                For Debug only
                print (env_obs)
                print (reward)
                print (done) 
                """

                for i in range(num_ai_agents):
                    agents[i].rewards.append(reward[i])  # Stack rewards

        
                # Unpack observations into data structure compatible with agent Policy
                agents_obs = unpack_env_obs(env_obs)
                load_info(agents, narrate=False)   # Load agent info for AI agents
        
                for i in range(num_agents):
                    US_hits[i] += agents[i].US_hit
                    THEM_hits[i] += agents[i].THEM_hit
            
                """
                For now, we do not stack observation, may come in handy later on
        
                # Evict oldest diff add new diff to state
                next_state = np.stack([next_state]*num_frames)
                next_state[1:, :, :] = state[:-1, :, :]
                state = next_state
                """
        
                if render:
                    env.render()
                    time.sleep(1/15)  # Change speed of video rendering

                if any(done):
                    print("Done after {} frames".format(frame))
                    break
            
            # Print out statistics of AI agents
            ep_rewards = 0
            ep_tags = 0
            ep_US_hits = 0
            ep_THEM_hits = 0

            if verbose:
                print ('\nStatistics by Agent')
                print ('===================')
            for i in range(num_ai_agents):
                agent_tags = sum(agents[i].tag_hist)
                ep_tags += agent_tags
                cum_agent_tags[i] += agent_tags

                agent_reward = sum(agents[i].rewards)
                ep_rewards += agent_reward
                cum_agent_rewards[i] += agent_reward

                agent_US_hits = sum(agents[i].US_hits)
                agent_THEM_hits = sum(agents[i].THEM_hits)
                ep_US_hits += agent_US_hits
                ep_THEM_hits += agent_THEM_hits
                cum_agent_US_hits[i] += agent_US_hits
                cum_agent_THEM_hits[i] += agent_THEM_hits
        
                if verbose:
                    print ("Agent{} aggressiveness is {:.2f}".format(i, agent_tags/frame))
                    print ("Agent{} reward is {:d}".format(i, agent_reward))
                    print('US agents hit = {}'.format(agent_US_hits))
                    print('THEM agents hit = {}'.format(agent_THEM_hits ))
        
            cum_rewards += ep_rewards
            cum_tags += ep_tags
            cum_US_hits += ep_US_hits
            cum_THEM_hits += ep_THEM_hits
    
            if verbose:
                print ('\nStatistics in Aggregate')
                print ('=======================')
                print ('Total rewards gathered = {}'.format(ep_rewards))
                print ('Num laser fired = {}'.format(ep_tags))
                print ('Total US Hit (friendly fire) = {}'.format(ep_US_hits))
                print ('Total THEM Hit = {}'.format(ep_THEM_hits))
                print ('friendly fire (%) = {0:.3f}'.format(ep_US_hits/(ep_US_hits+ep_THEM_hits+1e-7)))

            if verbose:
                print ('\nStatistics by Tribe')
                print ('===================')
            for i, t in enumerate(tribes):
                if t.name is not 'Crazies':
                    ep_tribe_reward = sum(t.sum_rewards())
                    cum_tribe_rewards[i] += ep_tribe_reward
                    if verbose:
                        print ('Tribe {} has total reward of {}'.format(t.name, ep_tribe_reward))

            for i in range(num_ai_agents):
                agents[i].clear_history()

        env.close()  # Close the rendering window
        end = time.time()

        print ('\nAverage Statistics in Aggregate')
        print ('=================================')
        total_rewards = cum_rewards/max_episodes
        print ('Total rewards gathered = {:.1f}'.format(total_rewards))
        av_agent_reward[dir_num][eps_num] = cum_rewards/max_episodes/num_ai_agents
        print ('Av. agent reward = {:.2f}'.format(av_agent_reward[dir_num][eps_num]))
        print ('Num laser fired = {:.1f}'.format(cum_tags/max_episodes))
        print ('Total US Hit (friendly fire) = {:.1f}'.format(cum_US_hits/max_episodes))
        print ('Total THEM Hit = {:.1f}'.format(cum_THEM_hits/max_episodes))
        print ('friendly fire (%) = {:.3f}'.format(cum_US_hits/(cum_US_hits+cum_THEM_hits+1e-7)))

        print ('\nAverage Statistics by Tribe')
        print ('=============================')
       
        for i, tribe in enumerate(tribes):
            if tribe.name is not 'Crazies':
                tribe_reward = cum_tribe_rewards[i]/max_episodes
                print ('Tribe {} has total reward of {:.1f}'.format(tribe.name, tribe_reward))    
                
                # Keep track of dominating team and the rewards gathered
                if tribe_reward > dom_tribe_reward[dir_num][eps_num]:   
                    dom_tribe_reward[dir_num][eps_num] = tribe_reward
                    dominating_tribe[dir_num][eps_num]  = tribe.name

        # Team dominance calculation                           
        print ('Dominating Tribe: {}'.format(dominating_tribe[dir_num][eps_num]))
        dominance[dir_num][eps_num] = dom_tribe_reward[dir_num][eps_num]/((total_rewards - \
                                                dom_tribe_reward[dir_num][eps_num]+1.1e-7)/(len(tribes)-1))    
        print ('Team dominance: {0:.2f}x'.format(dominance[dir_num][eps_num]))

        print ('\nAverage Statistics by Agent')
        print ('=============================')
        for i in range(num_ai_agents):
            print ("Agent{} of {} aggressiveness is {:.2f}".format(i, agents[i].tribe, \
                                                           cum_agent_tags[i]/(max_episodes*max_frames)))
            print ("Agent{} reward is {:.1f}".format(i, cum_agent_rewards[i]/max_episodes))
            print('US agents hit = {:.1f}'.format(cum_agent_US_hits[i]/max_episodes))
            print('THEM agents hit = {:.1f}'.format(cum_agent_THEM_hits[i]/max_episodes))

        print('Training time per epochs: {:.2f} sec'.format((end-start)/max_episodes))

# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)

###### Dir = MA_models/3T-9L/no_fragging/p-0.01/ #######
###### Trained episodes = 1000 #######
Load saved model for agent 0
Load saved model for agent 1
Load saved model for agent 2
Load saved model for agent 3
Load saved model for agent 4
Load saved model for agent 5
Load saved model for agent 6
Load saved model for agent 7
Load saved model for agent 8
..............................
Average Statistics in Aggregate
Total rewards gathered = 406.5
Av. agent reward = 45.17
Num laser fired = 331.4
Total US Hit (friendly fire) = 46.9
Total THEM Hit = 135.9
friendly fire (%) = 0.257

Average Statistics by Tribe
Tribe Vikings has total reward of 68.8
Tribe Saxons has total reward of 182.7
Tribe Franks has total reward of 155.0
Dominating Tribe: Saxons
Team dominance: 1.63x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 26.1
US agents hit = 0.0
THEM agents hit = 0.2
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 20.0
US agents hit = 0.0
THEM 

..............................
Average Statistics in Aggregate
Total rewards gathered = 357.7
Av. agent reward = 39.75
Num laser fired = 398.6
Total US Hit (friendly fire) = 54.6
Total THEM Hit = 115.3
friendly fire (%) = 0.321

Average Statistics by Tribe
Tribe Vikings has total reward of 48.5
Tribe Saxons has total reward of 76.8
Tribe Franks has total reward of 232.4
Dominating Tribe: Franks
Team dominance: 3.71x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 27.9
US agents hit = 0.1
THEM agents hit = 0.3
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 20.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 0.0
US agents hit = 0.0
THEM agents hit = 0.0
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 31.7
US agents hit = 0.0
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 19.3
US agents hit = 0.0
THEM agents hit = 0.0
Agent5 of Saxons aggressivene

..............................
Average Statistics in Aggregate
Total rewards gathered = 535.8
Av. agent reward = 59.53
Num laser fired = 213.9
Total US Hit (friendly fire) = 41.9
Total THEM Hit = 107.9
friendly fire (%) = 0.280

Average Statistics by Tribe
Tribe Vikings has total reward of 86.0
Tribe Saxons has total reward of 113.6
Tribe Franks has total reward of 336.2
Dominating Tribe: Franks
Team dominance: 3.37x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 28.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 29.8
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 27.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 36.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 36.5
US agents hit = 0.0
THEM agents hit = 0.0
Agent5 of Saxons aggressive

..............................
Average Statistics in Aggregate
Total rewards gathered = 662.1
Av. agent reward = 73.56
Num laser fired = 0.3
Total US Hit (friendly fire) = 0.1
Total THEM Hit = 0.4
friendly fire (%) = 0.235

Average Statistics by Tribe
Tribe Vikings has total reward of 241.5
Tribe Saxons has total reward of 150.2
Tribe Franks has total reward of 270.4
Dominating Tribe: Franks
Team dominance: 1.38x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 85.3
US agents hit = 0.0
THEM agents hit = 0.1
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 76.4
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 79.9
US agents hit = 0.0
THEM agents hit = 0.0
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 77.2
US agents hit = 0.0
THEM agents hit = 0.0
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 0.4
US agents hit = 0.0
THEM agents hit = 0.0
Agent5 of Saxons aggressiveness 

..............................
Average Statistics in Aggregate
Total rewards gathered = 644.3
Av. agent reward = 71.59
Num laser fired = 0.3
Total US Hit (friendly fire) = 0.1
Total THEM Hit = 0.6
friendly fire (%) = 0.150

Average Statistics by Tribe
Tribe Vikings has total reward of 224.5
Tribe Saxons has total reward of 218.2
Tribe Franks has total reward of 201.6
Dominating Tribe: Vikings
Team dominance: 1.07x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 79.0
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 77.4
US agents hit = 0.0
THEM agents hit = 0.2
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 68.1
US agents hit = 0.0
THEM agents hit = 0.0
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 76.6
US agents hit = 0.0
THEM agents hit = 0.1
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 75.3
US agents hit = 0.1
THEM agents hit = 0.2
Agent5 of Saxons aggressivenes

## Performance Stats - Cooperative

In [11]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

dir_names = ["MA_models/3T-9L/cooperative/cf0.01/",
             "MA_models/3T-9L/cooperative/cf0.1/",
             "MA_models/3T-9L/cooperative/cf1.0/",
             "MA_models/3T-9L/cooperative/cf5.0/",
             "MA_models/3T-9L/cooperative/cf10/",
             "MA_models/3T-9L/cooperative/cf15/",
             "MA_models/3T-9L/cooperative/cf20/",
             "MA_models/3T-9L/cooperative/cf25/",
             "MA_models/3T-9L/cooperative/cf50/"]

episodes = [1000,2000,3000,4000,5000]  
culture = "cooperative"

# Performance Statistics - for Research Report
av_agent_reward = [[0 for i in episodes] for j in dir_names]
dominating_tribe = [[None for i in episodes] for j in dir_names]
dom_tribe_reward = [[0 for i in episodes] for j in dir_names]
dominance = [[0 for i in episodes] for j in dir_names]

# There will be 9 agents - 3 teams of 3 AI agents each and 0 random agent
num_ai_agents = 9
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = False
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 30
max_frames = 1000
verbose = False

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

for dir_num, dir_name in enumerate(dir_names):
    print ("###### Dir = {} #######".format(dir_name))
    
    for eps_num, eps in enumerate(episodes):
        print ("###### Trained episodes = {} #######".format(eps))
    
        # Load models for AI agents
        agents= [[] for i in range(num_ai_agents)]
        # If episodes is provided (not 0), load the model for each AI agent
        for i in range(num_ai_agents):
            model_file = dir_name+'MA{}_Gather_ep{}.p'.format(i,eps)
            try:
                with open(model_file, 'rb') as f:
                    # Model File include both model and optim parameters
                    saved_model = pickle.load(f)
                    agents[i], _ = saved_model
                    print("Load saved model for agent {}".format(i))
            except OSError:
                print('Model file not found.')
                raise

        # Load random agents    
        for i in range(num_ai_agents,num_agents):
            print("Load random agent {}".format(i))
            agents.append(Rdn_Policy())
        
        # Establish tribal association
        tribes = []
        tribes.append(Tribe(name='Vikings',color='blue', culture=culture, \
                    agents=[agents[0], agents[1], agents[2]]))
        tribes.append(Tribe(name='Saxons', color='red', culture=culture, \
                    agents=[agents[3], agents[4], agents[5]]))
        tribes.append(Tribe(name='Franks', color='purple', culture=culture, \
                    agents=[agents[6], agents[7], agents[8]]))
        # tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!


        # 9 agents in 4 tribes, used map defined in default.txt
        agent_colors = [agent.color for agent in agents]
        agent_tribes = [agent.tribe for agent in agents]

        env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, \
                       map_name='default')    

        # Used to accumulate episode stats for averaging
        cum_rewards = 0
        cum_tags = 0
        cum_US_hits = 0
        cum_THEM_hits = 0
        cum_agent_rewards = [0 for agent in agents]
        cum_agent_tags = [0 for agent in agents]
        cum_agent_US_hits = [0 for agent in agents]
        cum_agent_THEM_hits = [0 for agent in agents]
        cum_tribe_rewards = [0 for t in tribes if t.name is not 'Crazies']

        cuda = False
        start = time.time()

        for ep in range(max_episodes):
    
            print('.', end='')  # To show progress
    
            # Initialize AI and random agent data
            actions = [0 for i in range(num_agents)]
            tags = [0 for i in range(num_agents)]
            US_hits = [0 for i in range(num_agents)]
            THEM_hits = [0 for i in range(num_agents)]

            env_obs = env.reset()  # Environment return observations
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            # Unpack observations into data structure compatible with agent Policy
            agents_obs = unpack_env_obs(env_obs)
    
            for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
                agents[i].reset_info()    
    
            if render:
                env.render()
                time.sleep(1/15)  # Change speed of video rendering
    
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            """
            For now, we do not stack observations, and we do not implement LSTM
    
            state = np.stack([state]*num_frames)

            # Reset LSTM hidden units when episode begins
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
            """

            for frame in range(max_frames):

                for i in range(num_ai_agents):    # For AI agents
                    actions[i], _ = select_action(agents[i], agents_obs[i], cuda=cuda)
                    if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                        tags[i] += 1   # record a tag for accessing aggressiveness
                
                for i in range(num_ai_agents, num_agents):   # For random agents
                    actions[i] = agents[i].select_action(agents_obs[i])
                    if actions[i] is 6:
                        tags[i] += 1   # record a tag for accessing aggressiveness
        
                """
                For now, we do not implement LSTM
                # Select action
                action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
                """

                # if frame % 10 == 0:
                #     print (actions)    
            
                # Perform step        
                env_obs, reward, done, info = env.step(actions)
        
                """
                For Debug only
                print (env_obs)
                print (reward)
                print (done) 
                """

                for i in range(num_ai_agents):
                    agents[i].rewards.append(reward[i])  # Stack rewards

        
                # Unpack observations into data structure compatible with agent Policy
                agents_obs = unpack_env_obs(env_obs)
                load_info(agents, narrate=False)   # Load agent info for AI agents
        
                for i in range(num_agents):
                    US_hits[i] += agents[i].US_hit
                    THEM_hits[i] += agents[i].THEM_hit
            
                """
                For now, we do not stack observation, may come in handy later on
        
                # Evict oldest diff add new diff to state
                next_state = np.stack([next_state]*num_frames)
                next_state[1:, :, :] = state[:-1, :, :]
                state = next_state
                """
        
                if render:
                    env.render()
                    time.sleep(1/15)  # Change speed of video rendering

                if any(done):
                    print("Done after {} frames".format(frame))
                    break
            
            # Print out statistics of AI agents
            ep_rewards = 0
            ep_tags = 0
            ep_US_hits = 0
            ep_THEM_hits = 0

            if verbose:
                print ('\nStatistics by Agent')
                print ('===================')
            for i in range(num_ai_agents):
                agent_tags = sum(agents[i].tag_hist)
                ep_tags += agent_tags
                cum_agent_tags[i] += agent_tags

                agent_reward = sum(agents[i].rewards)
                ep_rewards += agent_reward
                cum_agent_rewards[i] += agent_reward

                agent_US_hits = sum(agents[i].US_hits)
                agent_THEM_hits = sum(agents[i].THEM_hits)
                ep_US_hits += agent_US_hits
                ep_THEM_hits += agent_THEM_hits
                cum_agent_US_hits[i] += agent_US_hits
                cum_agent_THEM_hits[i] += agent_THEM_hits
        
                if verbose:
                    print ("Agent{} aggressiveness is {:.2f}".format(i, agent_tags/frame))
                    print ("Agent{} reward is {:d}".format(i, agent_reward))
                    print('US agents hit = {}'.format(agent_US_hits))
                    print('THEM agents hit = {}'.format(agent_THEM_hits ))
        
            cum_rewards += ep_rewards
            cum_tags += ep_tags
            cum_US_hits += ep_US_hits
            cum_THEM_hits += ep_THEM_hits
    
            if verbose:
                print ('\nStatistics in Aggregate')
                print ('=======================')
                print ('Total rewards gathered = {}'.format(ep_rewards))
                print ('Num laser fired = {}'.format(ep_tags))
                print ('Total US Hit (friendly fire) = {}'.format(ep_US_hits))
                print ('Total THEM Hit = {}'.format(ep_THEM_hits))
                print ('friendly fire (%) = {0:.3f}'.format(ep_US_hits/(ep_US_hits+ep_THEM_hits+1e-7)))

            if verbose:
                print ('\nStatistics by Tribe')
                print ('===================')
            for i, t in enumerate(tribes):
                if t.name is not 'Crazies':
                    ep_tribe_reward = sum(t.sum_rewards())
                    cum_tribe_rewards[i] += ep_tribe_reward
                    if verbose:
                        print ('Tribe {} has total reward of {}'.format(t.name, ep_tribe_reward))

            for i in range(num_ai_agents):
                agents[i].clear_history()

        env.close()  # Close the rendering window
        end = time.time()

        print ('\nAverage Statistics in Aggregate')
        print ('=================================')
        total_rewards = cum_rewards/max_episodes
        print ('Total rewards gathered = {:.1f}'.format(total_rewards))
        av_agent_reward[dir_num][eps_num] = cum_rewards/max_episodes/num_ai_agents
        print ('Av. agent reward = {:.2f}'.format(av_agent_reward[dir_num][eps_num]))
        print ('Num laser fired = {:.1f}'.format(cum_tags/max_episodes))
        print ('Total US Hit (friendly fire) = {:.1f}'.format(cum_US_hits/max_episodes))
        print ('Total THEM Hit = {:.1f}'.format(cum_THEM_hits/max_episodes))
        print ('friendly fire (%) = {:.3f}'.format(cum_US_hits/(cum_US_hits+cum_THEM_hits+1e-7)))

        print ('\nAverage Statistics by Tribe')
        print ('=============================')
       
        for i, tribe in enumerate(tribes):
            if tribe.name is not 'Crazies':
                tribe_reward = cum_tribe_rewards[i]/max_episodes
                print ('Tribe {} has total reward of {:.1f}'.format(tribe.name, tribe_reward))    
                
                # Keep track of dominating team and the rewards gathered
                if tribe_reward > dom_tribe_reward[dir_num][eps_num]:   
                    dom_tribe_reward[dir_num][eps_num] = tribe_reward
                    dominating_tribe[dir_num][eps_num]  = tribe.name

        # Team dominance calculation                           
        print ('Dominating Tribe: {}'.format(dominating_tribe[dir_num][eps_num]))
        dominance[dir_num][eps_num] = dom_tribe_reward[dir_num][eps_num]/((total_rewards - \
                                                dom_tribe_reward[dir_num][eps_num]+1.1e-7)/(len(tribes)-1))    
        print ('Team dominance: {0:.2f}x'.format(dominance[dir_num][eps_num]))

        print ('\nAverage Statistics by Agent')
        print ('=============================')
        for i in range(num_ai_agents):
            print ("Agent{} of {} aggressiveness is {:.2f}".format(i, agents[i].tribe, \
                                                           cum_agent_tags[i]/(max_episodes*max_frames)))
            print ("Agent{} reward is {:.1f}".format(i, cum_agent_rewards[i]/max_episodes))
            print('US agents hit = {:.1f}'.format(cum_agent_US_hits[i]/max_episodes))
            print('THEM agents hit = {:.1f}'.format(cum_agent_THEM_hits[i]/max_episodes))

        print('Training time per epochs: {:.2f} sec'.format((end-start)/max_episodes))

# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)

###### Dir = MA_models/3T-9L/cooperative/cf0.01/ #######
###### Trained episodes = 1000 #######
Load saved model for agent 0
Load saved model for agent 1
Load saved model for agent 2
Load saved model for agent 3
Load saved model for agent 4
Load saved model for agent 5
Load saved model for agent 6
Load saved model for agent 7
Load saved model for agent 8
..............................
Average Statistics in Aggregate
Total rewards gathered = 343.3
Av. agent reward = 38.15
Num laser fired = 284.9
Total US Hit (friendly fire) = 33.9
Total THEM Hit = 127.0
friendly fire (%) = 0.211

Average Statistics by Tribe
Tribe Vikings has total reward of 191.4
Tribe Saxons has total reward of 44.9
Tribe Franks has total reward of 107.0
Dominating Tribe: Vikings
Team dominance: 2.52x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 21.1
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 2.2
US agents hit = 0.1
THEM 

..............................
Average Statistics in Aggregate
Total rewards gathered = 363.3
Av. agent reward = 40.36
Num laser fired = 842.1
Total US Hit (friendly fire) = 26.3
Total THEM Hit = 172.2
friendly fire (%) = 0.132

Average Statistics by Tribe
Tribe Vikings has total reward of 34.7
Tribe Saxons has total reward of 138.6
Tribe Franks has total reward of 190.0
Dominating Tribe: Franks
Team dominance: 2.19x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 0.4
US agents hit = 0.6
THEM agents hit = 1.8
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 23.6
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 10.7
US agents hit = 0.3
THEM agents hit = 2.4
Agent3 of Saxons aggressiveness is 0.02
Agent3 reward is 10.0
US agents hit = 1.9
THEM agents hit = 6.3
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 7.9
US agents hit = 0.7
THEM agents hit = 1.9
Agent5 of Saxons aggressivene

..............................
Average Statistics in Aggregate
Total rewards gathered = 287.9
Av. agent reward = 31.99
Num laser fired = 425.3
Total US Hit (friendly fire) = 39.7
Total THEM Hit = 144.7
friendly fire (%) = 0.215

Average Statistics by Tribe
Tribe Vikings has total reward of 50.6
Tribe Saxons has total reward of 76.3
Tribe Franks has total reward of 161.0
Dominating Tribe: Franks
Team dominance: 2.54x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.03
Agent0 reward is 0.0
US agents hit = 1.9
THEM agents hit = 7.0
Agent1 of Vikings aggressiveness is 0.06
Agent1 reward is 46.2
US agents hit = 5.3
THEM agents hit = 36.7
Agent2 of Vikings aggressiveness is 0.02
Agent2 reward is 4.4
US agents hit = 1.8
THEM agents hit = 3.8
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 2.0
US agents hit = 0.2
THEM agents hit = 0.7
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 0.8
US agents hit = 0.3
THEM agents hit = 1.7
Agent5 of Saxons aggressiveness

..............................
Average Statistics in Aggregate
Total rewards gathered = 353.5
Av. agent reward = 39.27
Num laser fired = 186.9
Total US Hit (friendly fire) = 30.7
Total THEM Hit = 124.1
friendly fire (%) = 0.198

Average Statistics by Tribe
Tribe Vikings has total reward of 115.2
Tribe Saxons has total reward of 131.5
Tribe Franks has total reward of 106.7
Dominating Tribe: Saxons
Team dominance: 1.19x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.10
Agent0 reward is 109.7
US agents hit = 12.4
THEM agents hit = 69.6
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 3.2
US agents hit = 0.6
THEM agents hit = 1.0
Agent2 of Vikings aggressiveness is 0.01
Agent2 reward is 2.2
US agents hit = 1.8
THEM agents hit = 2.2
Agent3 of Saxons aggressiveness is 0.01
Agent3 reward is 42.1
US agents hit = 0.8
THEM agents hit = 2.5
Agent4 of Saxons aggressiveness is 0.05
Agent4 reward is 72.5
US agents hit = 13.2
THEM agents hit = 42.5
Agent5 of Saxons aggres

..............................
Average Statistics in Aggregate
Total rewards gathered = 333.9
Av. agent reward = 37.10
Num laser fired = 245.5
Total US Hit (friendly fire) = 28.8
Total THEM Hit = 130.7
friendly fire (%) = 0.180

Average Statistics by Tribe
Tribe Vikings has total reward of 106.6
Tribe Saxons has total reward of 144.5
Tribe Franks has total reward of 82.8
Dominating Tribe: Saxons
Team dominance: 1.53x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.01
Agent0 reward is 0.0
US agents hit = 1.3
THEM agents hit = 1.7
Agent1 of Vikings aggressiveness is 0.02
Agent1 reward is 2.4
US agents hit = 7.1
THEM agents hit = 10.7
Agent2 of Vikings aggressiveness is 0.05
Agent2 reward is 104.2
US agents hit = 3.9
THEM agents hit = 43.8
Agent3 of Saxons aggressiveness is 0.09
Agent3 reward is 143.0
US agents hit = 5.4
THEM agents hit = 39.6
Agent4 of Saxons aggressiveness is 0.01
Agent4 reward is 0.1
US agents hit = 1.2
THEM agents hit = 2.8
Agent5 of Saxons aggressi

..............................
Average Statistics in Aggregate
Total rewards gathered = 313.9
Av. agent reward = 34.88
Num laser fired = 378.0
Total US Hit (friendly fire) = 42.1
Total THEM Hit = 124.8
friendly fire (%) = 0.252

Average Statistics by Tribe
Tribe Vikings has total reward of 5.0
Tribe Saxons has total reward of 82.8
Tribe Franks has total reward of 226.1
Dominating Tribe: Franks
Team dominance: 5.15x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.04
Agent0 reward is 0.3
US agents hit = 8.3
THEM agents hit = 19.5
Agent1 of Vikings aggressiveness is 0.09
Agent1 reward is 4.6
US agents hit = 16.4
THEM agents hit = 42.6
Agent2 of Vikings aggressiveness is 0.04
Agent2 reward is 0.2
US agents hit = 5.8
THEM agents hit = 7.5
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 0.0
US agents hit = 0.2
THEM agents hit = 0.6
Agent4 of Saxons aggressiveness is 0.03
Agent4 reward is 82.8
US agents hit = 1.9
THEM agents hit = 8.9
Agent5 of Saxons aggressivenes

..............................
Average Statistics in Aggregate
Total rewards gathered = 370.1
Av. agent reward = 41.12
Num laser fired = 1006.1
Total US Hit (friendly fire) = 36.0
Total THEM Hit = 224.9
friendly fire (%) = 0.138

Average Statistics by Tribe
Tribe Vikings has total reward of 6.3
Tribe Saxons has total reward of 22.8
Tribe Franks has total reward of 340.9
Dominating Tribe: Franks
Team dominance: 23.41x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.01
Agent0 reward is 0.4
US agents hit = 0.4
THEM agents hit = 1.2
Agent1 of Vikings aggressiveness is 0.01
Agent1 reward is 0.0
US agents hit = 0.3
THEM agents hit = 0.4
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 5.9
US agents hit = 0.0
THEM agents hit = 0.9
Agent3 of Saxons aggressiveness is 0.01
Agent3 reward is 0.3
US agents hit = 0.4
THEM agents hit = 1.9
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 0.8
US agents hit = 0.0
THEM agents hit = 0.3
Agent5 of Saxons aggressiveness 

..............................
Average Statistics in Aggregate
Total rewards gathered = 250.2
Av. agent reward = 27.80
Num laser fired = 500.6
Total US Hit (friendly fire) = 39.1
Total THEM Hit = 126.6
friendly fire (%) = 0.236

Average Statistics by Tribe
Tribe Vikings has total reward of 96.9
Tribe Saxons has total reward of 44.0
Tribe Franks has total reward of 109.3
Dominating Tribe: Franks
Team dominance: 1.55x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.31
Agent0 reward is 80.4
US agents hit = 21.3
THEM agents hit = 60.8
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 0.5
US agents hit = 0.3
THEM agents hit = 1.2
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 16.0
US agents hit = 0.1
THEM agents hit = 0.5
Agent3 of Saxons aggressiveness is 0.01
Agent3 reward is 0.0
US agents hit = 1.4
THEM agents hit = 6.1
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 0.1
US agents hit = 0.0
THEM agents hit = 0.0
Agent5 of Saxons aggressivene

..............................
Average Statistics in Aggregate
Total rewards gathered = 402.0
Av. agent reward = 44.66
Num laser fired = 824.4
Total US Hit (friendly fire) = 28.6
Total THEM Hit = 197.3
friendly fire (%) = 0.127

Average Statistics by Tribe
Tribe Vikings has total reward of 58.7
Tribe Saxons has total reward of 60.4
Tribe Franks has total reward of 282.8
Dominating Tribe: Franks
Team dominance: 4.75x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 0.0
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.02
Agent1 reward is 58.7
US agents hit = 0.6
THEM agents hit = 3.8
Agent2 of Vikings aggressiveness is 0.01
Agent2 reward is 0.0
US agents hit = 0.4
THEM agents hit = 1.5
Agent3 of Saxons aggressiveness is 0.03
Agent3 reward is 1.3
US agents hit = 1.3
THEM agents hit = 3.0
Agent4 of Saxons aggressiveness is 0.01
Agent4 reward is 59.1
US agents hit = 0.1
THEM agents hit = 4.8
Agent5 of Saxons aggressiveness

In [15]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

dir_names = [
             "MA_models/3T-9L/cooperative/cf30/",
             "MA_models/3T-9L/cooperative/cf40/",
             "MA_models/3T-9L/cooperative/cf50/",
             "MA_models/3T-9L/cooperative/cf60/",
             "MA_models/3T-9L/cooperative/cf70/",
             "MA_models/3T-9L/cooperative/cf80/",
             "MA_models/3T-9L/cooperative/cf90/",
             "MA_models/3T-9L/cooperative/cf100/"
            ]

episodes = [1000,2000,3000,4000,5000]  
culture = "cooperative"

# Performance Statistics - for Research Report
av_agent_reward = [[0 for i in episodes] for j in dir_names]
dominating_tribe = [[None for i in episodes] for j in dir_names]
dom_tribe_reward = [[0 for i in episodes] for j in dir_names]
dominance = [[0 for i in episodes] for j in dir_names]

# There will be 9 agents - 3 teams of 3 AI agents each and 0 random agent
num_ai_agents = 9
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = False
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 30
max_frames = 1000
verbose = False

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

for dir_num, dir_name in enumerate(dir_names):
    print ("###### Dir = {} #######".format(dir_name))
    
    for eps_num, eps in enumerate(episodes):
        print ("###### Trained episodes = {} #######".format(eps))
    
        # Load models for AI agents
        agents= [[] for i in range(num_ai_agents)]
        # If episodes is provided (not 0), load the model for each AI agent
        for i in range(num_ai_agents):
            model_file = dir_name+'MA{}_Gather_ep{}.p'.format(i,eps)
            try:
                with open(model_file, 'rb') as f:
                    # Model File include both model and optim parameters
                    saved_model = pickle.load(f)
                    agents[i], _ = saved_model
                    print("Load saved model for agent {}".format(i))
            except OSError:
                print('Model file not found.')
                raise

        # Load random agents    
        for i in range(num_ai_agents,num_agents):
            print("Load random agent {}".format(i))
            agents.append(Rdn_Policy())
        
        # Establish tribal association
        tribes = []
        tribes.append(Tribe(name='Vikings',color='blue', culture=culture, \
                    agents=[agents[0], agents[1], agents[2]]))
        tribes.append(Tribe(name='Saxons', color='red', culture=culture, \
                    agents=[agents[3], agents[4], agents[5]]))
        tribes.append(Tribe(name='Franks', color='purple', culture=culture, \
                    agents=[agents[6], agents[7], agents[8]]))
        # tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!


        # 9 agents in 4 tribes, used map defined in default.txt
        agent_colors = [agent.color for agent in agents]
        agent_tribes = [agent.tribe for agent in agents]

        env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, \
                       map_name='default')    

        # Used to accumulate episode stats for averaging
        cum_rewards = 0
        cum_tags = 0
        cum_US_hits = 0
        cum_THEM_hits = 0
        cum_agent_rewards = [0 for agent in agents]
        cum_agent_tags = [0 for agent in agents]
        cum_agent_US_hits = [0 for agent in agents]
        cum_agent_THEM_hits = [0 for agent in agents]
        cum_tribe_rewards = [0 for t in tribes if t.name is not 'Crazies']

        cuda = False
        start = time.time()

        for ep in range(max_episodes):
    
            print('.', end='')  # To show progress
    
            # Initialize AI and random agent data
            actions = [0 for i in range(num_agents)]
            tags = [0 for i in range(num_agents)]
            US_hits = [0 for i in range(num_agents)]
            THEM_hits = [0 for i in range(num_agents)]

            env_obs = env.reset()  # Environment return observations
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            # Unpack observations into data structure compatible with agent Policy
            agents_obs = unpack_env_obs(env_obs)
    
            for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
                agents[i].reset_info()    
    
            if render:
                env.render()
                time.sleep(1/15)  # Change speed of video rendering
    
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            """
            For now, we do not stack observations, and we do not implement LSTM
    
            state = np.stack([state]*num_frames)

            # Reset LSTM hidden units when episode begins
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
            """

            for frame in range(max_frames):

                for i in range(num_ai_agents):    # For AI agents
                    actions[i], _ = select_action(agents[i], agents_obs[i], cuda=cuda)
                    if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                        tags[i] += 1   # record a tag for accessing aggressiveness
                
                for i in range(num_ai_agents, num_agents):   # For random agents
                    actions[i] = agents[i].select_action(agents_obs[i])
                    if actions[i] is 6:
                        tags[i] += 1   # record a tag for accessing aggressiveness
        
                """
                For now, we do not implement LSTM
                # Select action
                action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
                """

                # if frame % 10 == 0:
                #     print (actions)    
            
                # Perform step        
                env_obs, reward, done, info = env.step(actions)
        
                """
                For Debug only
                print (env_obs)
                print (reward)
                print (done) 
                """

                for i in range(num_ai_agents):
                    agents[i].rewards.append(reward[i])  # Stack rewards

        
                # Unpack observations into data structure compatible with agent Policy
                agents_obs = unpack_env_obs(env_obs)
                load_info(agents, narrate=False)   # Load agent info for AI agents
        
                for i in range(num_agents):
                    US_hits[i] += agents[i].US_hit
                    THEM_hits[i] += agents[i].THEM_hit
            
                """
                For now, we do not stack observation, may come in handy later on
        
                # Evict oldest diff add new diff to state
                next_state = np.stack([next_state]*num_frames)
                next_state[1:, :, :] = state[:-1, :, :]
                state = next_state
                """
        
                if render:
                    env.render()
                    time.sleep(1/15)  # Change speed of video rendering

                if any(done):
                    print("Done after {} frames".format(frame))
                    break
            
            # Print out statistics of AI agents
            ep_rewards = 0
            ep_tags = 0
            ep_US_hits = 0
            ep_THEM_hits = 0

            if verbose:
                print ('\nStatistics by Agent')
                print ('===================')
            for i in range(num_ai_agents):
                agent_tags = sum(agents[i].tag_hist)
                ep_tags += agent_tags
                cum_agent_tags[i] += agent_tags

                agent_reward = sum(agents[i].rewards)
                ep_rewards += agent_reward
                cum_agent_rewards[i] += agent_reward

                agent_US_hits = sum(agents[i].US_hits)
                agent_THEM_hits = sum(agents[i].THEM_hits)
                ep_US_hits += agent_US_hits
                ep_THEM_hits += agent_THEM_hits
                cum_agent_US_hits[i] += agent_US_hits
                cum_agent_THEM_hits[i] += agent_THEM_hits
        
                if verbose:
                    print ("Agent{} aggressiveness is {:.2f}".format(i, agent_tags/frame))
                    print ("Agent{} reward is {:d}".format(i, agent_reward))
                    print('US agents hit = {}'.format(agent_US_hits))
                    print('THEM agents hit = {}'.format(agent_THEM_hits ))
        
            cum_rewards += ep_rewards
            cum_tags += ep_tags
            cum_US_hits += ep_US_hits
            cum_THEM_hits += ep_THEM_hits
    
            if verbose:
                print ('\nStatistics in Aggregate')
                print ('=======================')
                print ('Total rewards gathered = {}'.format(ep_rewards))
                print ('Num laser fired = {}'.format(ep_tags))
                print ('Total US Hit (friendly fire) = {}'.format(ep_US_hits))
                print ('Total THEM Hit = {}'.format(ep_THEM_hits))
                print ('friendly fire (%) = {0:.3f}'.format(ep_US_hits/(ep_US_hits+ep_THEM_hits+1e-7)))

            if verbose:
                print ('\nStatistics by Tribe')
                print ('===================')
            for i, t in enumerate(tribes):
                if t.name is not 'Crazies':
                    ep_tribe_reward = sum(t.sum_rewards())
                    cum_tribe_rewards[i] += ep_tribe_reward
                    if verbose:
                        print ('Tribe {} has total reward of {}'.format(t.name, ep_tribe_reward))

            for i in range(num_ai_agents):
                agents[i].clear_history()

        env.close()  # Close the rendering window
        end = time.time()

        print ('\nAverage Statistics in Aggregate')
        print ('=================================')
        total_rewards = cum_rewards/max_episodes
        print ('Total rewards gathered = {:.1f}'.format(total_rewards))
        av_agent_reward[dir_num][eps_num] = cum_rewards/max_episodes/num_ai_agents
        print ('Av. agent reward = {:.2f}'.format(av_agent_reward[dir_num][eps_num]))
        print ('Num laser fired = {:.1f}'.format(cum_tags/max_episodes))
        print ('Total US Hit (friendly fire) = {:.1f}'.format(cum_US_hits/max_episodes))
        print ('Total THEM Hit = {:.1f}'.format(cum_THEM_hits/max_episodes))
        print ('friendly fire (%) = {:.3f}'.format(cum_US_hits/(cum_US_hits+cum_THEM_hits+1e-7)))

        print ('\nAverage Statistics by Tribe')
        print ('=============================')
       
        for i, tribe in enumerate(tribes):
            if tribe.name is not 'Crazies':
                tribe_reward = cum_tribe_rewards[i]/max_episodes
                print ('Tribe {} has total reward of {:.1f}'.format(tribe.name, tribe_reward))    
                
                # Keep track of dominating team and the rewards gathered
                if tribe_reward > dom_tribe_reward[dir_num][eps_num]:   
                    dom_tribe_reward[dir_num][eps_num] = tribe_reward
                    dominating_tribe[dir_num][eps_num]  = tribe.name

        # Team dominance calculation                           
        print ('Dominating Tribe: {}'.format(dominating_tribe[dir_num][eps_num]))
        dominance[dir_num][eps_num] = dom_tribe_reward[dir_num][eps_num]/((total_rewards - \
                                                dom_tribe_reward[dir_num][eps_num]+1.1e-7)/(len(tribes)-1))    
        print ('Team dominance: {0:.2f}x'.format(dominance[dir_num][eps_num]))

        print ('\nAverage Statistics by Agent')
        print ('=============================')
        for i in range(num_ai_agents):
            print ("Agent{} of {} aggressiveness is {:.2f}".format(i, agents[i].tribe, \
                                                           cum_agent_tags[i]/(max_episodes*max_frames)))
            print ("Agent{} reward is {:.1f}".format(i, cum_agent_rewards[i]/max_episodes))
            print('US agents hit = {:.1f}'.format(cum_agent_US_hits[i]/max_episodes))
            print('THEM agents hit = {:.1f}'.format(cum_agent_THEM_hits[i]/max_episodes))

        print('Training time per epochs: {:.2f} sec'.format((end-start)/max_episodes))

# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)

###### Dir = MA_models/3T-9L/cooperative/cf30/ #######
###### Trained episodes = 1000 #######
Load saved model for agent 0
Load saved model for agent 1
Load saved model for agent 2
Load saved model for agent 3
Load saved model for agent 4
Load saved model for agent 5
Load saved model for agent 6
Load saved model for agent 7
Load saved model for agent 8
..............................
Average Statistics in Aggregate
Total rewards gathered = 374.0
Av. agent reward = 41.56
Num laser fired = 194.1
Total US Hit (friendly fire) = 28.0
Total THEM Hit = 117.7
friendly fire (%) = 0.192

Average Statistics by Tribe
Tribe Vikings has total reward of 183.2
Tribe Saxons has total reward of 106.5
Tribe Franks has total reward of 84.3
Dominating Tribe: Vikings
Team dominance: 1.92x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.10
Agent0 reward is 174.8
US agents hit = 12.1
THEM agents hit = 63.5
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 0.0
US agents hit = 0.1
THEM

..............................
Average Statistics in Aggregate
Total rewards gathered = 277.1
Av. agent reward = 30.79
Num laser fired = 312.0
Total US Hit (friendly fire) = 34.0
Total THEM Hit = 115.2
friendly fire (%) = 0.228

Average Statistics by Tribe
Tribe Vikings has total reward of 45.0
Tribe Saxons has total reward of 118.2
Tribe Franks has total reward of 114.0
Dominating Tribe: Saxons
Team dominance: 1.49x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.01
Agent0 reward is 42.3
US agents hit = 1.1
THEM agents hit = 3.1
Agent1 of Vikings aggressiveness is 0.02
Agent1 reward is 0.3
US agents hit = 3.9
THEM agents hit = 9.2
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 2.4
US agents hit = 0.4
THEM agents hit = 1.9
Agent3 of Saxons aggressiveness is 0.18
Agent3 reward is 106.6
US agents hit = 11.1
THEM agents hit = 42.3
Agent4 of Saxons aggressiveness is 0.01
Agent4 reward is 0.0
US agents hit = 2.0
THEM agents hit = 7.1
Agent5 of Saxons aggressive

..............................
Average Statistics in Aggregate
Total rewards gathered = 393.6
Av. agent reward = 43.74
Num laser fired = 815.1
Total US Hit (friendly fire) = 26.9
Total THEM Hit = 194.4
friendly fire (%) = 0.121

Average Statistics by Tribe
Tribe Vikings has total reward of 58.1
Tribe Saxons has total reward of 62.1
Tribe Franks has total reward of 273.4
Dominating Tribe: Franks
Team dominance: 4.55x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 0.0
US agents hit = 0.0
THEM agents hit = 0.1
Agent1 of Vikings aggressiveness is 0.02
Agent1 reward is 58.1
US agents hit = 0.5
THEM agents hit = 4.4
Agent2 of Vikings aggressiveness is 0.01
Agent2 reward is 0.0
US agents hit = 0.2
THEM agents hit = 1.2
Agent3 of Saxons aggressiveness is 0.03
Agent3 reward is 1.0
US agents hit = 1.1
THEM agents hit = 3.5
Agent4 of Saxons aggressiveness is 0.01
Agent4 reward is 61.2
US agents hit = 0.6
THEM agents hit = 5.2
Agent5 of Saxons aggressiveness

..............................
Average Statistics in Aggregate
Total rewards gathered = 181.8
Av. agent reward = 20.20
Num laser fired = 993.9
Total US Hit (friendly fire) = 2.1
Total THEM Hit = 212.1
friendly fire (%) = 0.010

Average Statistics by Tribe
Tribe Vikings has total reward of 110.4
Tribe Saxons has total reward of 16.6
Tribe Franks has total reward of 54.7
Dominating Tribe: Vikings
Team dominance: 3.10x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 110.4
US agents hit = 0.1
THEM agents hit = 0.2
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 0.0
US agents hit = 0.0
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.97
Agent2 reward is 0.0
US agents hit = 1.1
THEM agents hit = 205.6
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 0.0
US agents hit = 0.0
THEM agents hit = 1.2
Agent4 of Saxons aggressiveness is 0.00
Agent4 reward is 0.4
US agents hit = 0.0
THEM agents hit = 0.1
Agent5 of Saxons aggressivene

..............................
Average Statistics in Aggregate
Total rewards gathered = 358.3
Av. agent reward = 39.81
Num laser fired = 279.2
Total US Hit (friendly fire) = 23.0
Total THEM Hit = 86.7
friendly fire (%) = 0.209

Average Statistics by Tribe
Tribe Vikings has total reward of 62.1
Tribe Saxons has total reward of 113.8
Tribe Franks has total reward of 182.3
Dominating Tribe: Franks
Team dominance: 2.07x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 5.8
US agents hit = 0.4
THEM agents hit = 0.8
Agent1 of Vikings aggressiveness is 0.02
Agent1 reward is 20.9
US agents hit = 3.3
THEM agents hit = 10.4
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 35.4
US agents hit = 0.1
THEM agents hit = 0.2
Agent3 of Saxons aggressiveness is 0.01
Agent3 reward is 0.0
US agents hit = 0.6
THEM agents hit = 4.7
Agent4 of Saxons aggressiveness is 0.02
Agent4 reward is 8.1
US agents hit = 2.4
THEM agents hit = 4.9
Agent5 of Saxons aggressivenes

..............................
Average Statistics in Aggregate
Total rewards gathered = 334.3
Av. agent reward = 37.14
Num laser fired = 604.9
Total US Hit (friendly fire) = 34.5
Total THEM Hit = 125.1
friendly fire (%) = 0.216

Average Statistics by Tribe
Tribe Vikings has total reward of 36.4
Tribe Saxons has total reward of 207.9
Tribe Franks has total reward of 90.0
Dominating Tribe: Saxons
Team dominance: 3.29x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.02
Agent0 reward is 0.5
US agents hit = 3.9
THEM agents hit = 8.2
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 34.7
US agents hit = 0.1
THEM agents hit = 0.0
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 1.1
US agents hit = 0.1
THEM agents hit = 0.1
Agent3 of Saxons aggressiveness is 0.10
Agent3 reward is 1.4
US agents hit = 5.2
THEM agents hit = 10.1
Agent4 of Saxons aggressiveness is 0.15
Agent4 reward is 206.5
US agents hit = 7.2
THEM agents hit = 42.0
Agent5 of Saxons aggressiven

..............................
Average Statistics in Aggregate
Total rewards gathered = 339.5
Av. agent reward = 37.72
Num laser fired = 245.6
Total US Hit (friendly fire) = 31.0
Total THEM Hit = 115.5
friendly fire (%) = 0.212

Average Statistics by Tribe
Tribe Vikings has total reward of 44.7
Tribe Saxons has total reward of 165.3
Tribe Franks has total reward of 129.5
Dominating Tribe: Saxons
Team dominance: 1.90x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.00
Agent0 reward is 0.0
US agents hit = 0.0
THEM agents hit = 0.0
Agent1 of Vikings aggressiveness is 0.00
Agent1 reward is 0.4
US agents hit = 1.5
THEM agents hit = 1.1
Agent2 of Vikings aggressiveness is 0.12
Agent2 reward is 44.3
US agents hit = 10.1
THEM agents hit = 42.1
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 0.0
US agents hit = 0.2
THEM agents hit = 3.2
Agent4 of Saxons aggressiveness is 0.06
Agent4 reward is 127.5
US agents hit = 6.4
THEM agents hit = 25.1
Agent5 of Saxons aggressiv

..............................
Average Statistics in Aggregate
Total rewards gathered = 373.8
Av. agent reward = 41.53
Num laser fired = 261.9
Total US Hit (friendly fire) = 25.4
Total THEM Hit = 92.7
friendly fire (%) = 0.215

Average Statistics by Tribe
Tribe Vikings has total reward of 116.7
Tribe Saxons has total reward of 101.5
Tribe Franks has total reward of 155.5
Dominating Tribe: Franks
Team dominance: 1.43x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.02
Agent0 reward is 3.5
US agents hit = 2.1
THEM agents hit = 4.5
Agent1 of Vikings aggressiveness is 0.07
Agent1 reward is 106.7
US agents hit = 9.3
THEM agents hit = 37.2
Agent2 of Vikings aggressiveness is 0.00
Agent2 reward is 6.6
US agents hit = 0.0
THEM agents hit = 0.2
Agent3 of Saxons aggressiveness is 0.00
Agent3 reward is 0.2
US agents hit = 0.1
THEM agents hit = 0.1
Agent4 of Saxons aggressiveness is 0.03
Agent4 reward is 1.1
US agents hit = 3.4
THEM agents hit = 9.7
Agent5 of Saxons aggressivene

## Performance Stats - Warlike

In [14]:
import pickle
import numpy as np

import torch
from torch.autograd import Variable

dir_names = [
             #"MA_models/3T-9L/warlike/p-1.0_r0.001/",
             #"MA_models/3T-9L/warlike/p-1.0_r0.005/",  
             #"MA_models/3T-9L/warlike/p-1.0_r0.01/",   
             #"MA_models/3T-9L/warlike/p-1.0_r0.05/",   
             #"MA_models/3T-9L/warlike/p-1.0_r0.1/",
             "MA_models/3T-9L/warlike/p-1.0_r0.5/",
             "MA_models/3T-9L/warlike/p-1.0_r1.0/"]

episodes = [1000,2000
            #,3000,4000,5000
           ]  
culture = "warlike"

# Performance Statistics - for Research Report
av_agent_reward = [[0 for i in episodes] for j in dir_names]
dominating_tribe = [[None for i in episodes] for j in dir_names]
dom_tribe_reward = [[0 for i in episodes] for j in dir_names]
dominance = [[0 for i in episodes] for j in dir_names]

# There will be 9 agents - 3 teams of 3 AI agents each and 0 random agent
num_ai_agents = 9
num_rdn_agents = 0
num_agents = num_ai_agents+num_rdn_agents  # just the sum of the two

# Data structure for AI agents (agents will form their own Class later on)
agents = []
actions = []
tags = []

# Initialize environment
render = False
num_actions = 8                       # There are 8 actions defined in Gathering

# Initialize constants
num_frames = 4
max_episodes = 30
max_frames = 1000
verbose = False

def unpack_env_obs(env_obs):
    """
    Gathering is a partially-observable Markov Game. env_obs returned by GatheringEnv is a numpy 
    array of dimension (num_agent, 800), which represents the agents' observations of the game.

    The 800 elements (view_box) encodes 4 layers of 10x20 pixels frames in the format:
    (viewbox_width, viewbox_depth, 4).
    
    This code reshapes the above into stacked frames that can be accepted by the Policy class:
    (batch_idx, in_channel, width, height)
    
    """
    
    num_agents = len(env_obs)  # environ observations is a list of agents' observations
    
    obs = []
    for i in range(num_agents):
        x = env_obs[i]   # take the indexed agent's observation
        x = torch.Tensor(x)   # Convert to tensor
        
        # Policy is a 3-layer CNN
        x = x.view(1, 10, 20, -1)  # reshape into environment defined stacked frames
        x = x.permute(0, 3, 1, 2)  # permute to Policy accepted stacked frames
        obs.append(x)
        
    return obs  # return a list of Policy accepted stacked frames (tensor)


"""
For now, we do not implement LSTM            
# LSTM Change: Need to cycle hx and cx thru function
def select_action(model, state, lstm_hc, cuda):
    hx , cx = lstm_hc 
    num_frames, height, width = state.shape
    state = torch.FloatTensor(state.reshape(-1, num_frames, height, width))

    if cuda:
        state = state.cuda()

    probs, value, (hx, cx) = model((Variable(state), (hx, cx)))

    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)
    # LSTM Change: Need to cycle hx and cx thru function
    return action.data[0], log_prob, value, (hx, cx)
"""

def select_action(model, obs, cuda):
    """
    This code expects obs to be an array of stacked frames of the following dim:
    (batch_idx, in_channel, width, height)
    
    This is inputted into model - the agent's Policy, which outputs a probability 
    distribution over available actions.
    
    Policy gradient is implemented using torch.distributions.Categorical. 
    """
    
    # Policy is a 3-layer CNN
    # _, num_frames, width, height = obs.shape
    # obs = torch.FloatTensor(obs.reshape(-1, num_frames, width, height))
    
    # Policy is a 2-layer NN for now
    # obs = obs.view(1, -1)
   
    if cuda:
        obs = obs.cuda()
      
    probs = model(obs)
    m = torch.distributions.Categorical(probs)
    action = m.sample()
    log_prob = m.log_prob(action)

    return action.item(), log_prob 


def load_info(agents, narrate=False):
    for i in range(num_agents):    
        agents[i].load_info(info[i])
        if narrate:
            if agents[i].tagged:
                print('frame {}, agent{} is tagged'.format(frame,i))
            if agents[i].laser_fired:
                print('frame {}, agent{} fires its laser'.format(frame,i))
                print('and hit {} US and {} THEM'.format(agents[i].US_hit, agents[i].THEM_hit))
    return

for dir_num, dir_name in enumerate(dir_names):
    print ("###### Dir = {} #######".format(dir_name))
    
    for eps_num, eps in enumerate(episodes):
        print ("###### Trained episodes = {} #######".format(eps))
    
        # Load models for AI agents
        agents= [[] for i in range(num_ai_agents)]
        # If episodes is provided (not 0), load the model for each AI agent
        for i in range(num_ai_agents):
            model_file = dir_name+'MA{}_Gather_ep{}.p'.format(i,eps)
            try:
                with open(model_file, 'rb') as f:
                    # Model File include both model and optim parameters
                    saved_model = pickle.load(f)
                    agents[i], _ = saved_model
                    print("Load saved model for agent {}".format(i))
            except OSError:
                print('Model file not found.')
                raise

        # Load random agents    
        for i in range(num_ai_agents,num_agents):
            print("Load random agent {}".format(i))
            agents.append(Rdn_Policy())
        
        # Establish tribal association
        tribes = []
        tribes.append(Tribe(name='Vikings',color='blue', culture=culture, \
                    agents=[agents[0], agents[1], agents[2]]))
        tribes.append(Tribe(name='Saxons', color='red', culture=culture, \
                    agents=[agents[3], agents[4], agents[5]]))
        tribes.append(Tribe(name='Franks', color='purple', culture=culture, \
                    agents=[agents[6], agents[7], agents[8]]))
        # tribes.append(Tribe(name='Crazies', color='yellow', agents=[agents[9]]))   # random agents are crazy!!!


        # 9 agents in 4 tribes, used map defined in default.txt
        agent_colors = [agent.color for agent in agents]
        agent_tribes = [agent.tribe for agent in agents]

        env = GatheringEnv(n_agents=num_agents,agent_colors=agent_colors, agent_tribes=agent_tribes, \
                       map_name='default')    

        # Used to accumulate episode stats for averaging
        cum_rewards = 0
        cum_tags = 0
        cum_US_hits = 0
        cum_THEM_hits = 0
        cum_agent_rewards = [0 for agent in agents]
        cum_agent_tags = [0 for agent in agents]
        cum_agent_US_hits = [0 for agent in agents]
        cum_agent_THEM_hits = [0 for agent in agents]
        cum_tribe_rewards = [0 for t in tribes if t.name is not 'Crazies']

        cuda = False
        start = time.time()

        for ep in range(max_episodes):
    
            print('.', end='')  # To show progress
    
            # Initialize AI and random agent data
            actions = [0 for i in range(num_agents)]
            tags = [0 for i in range(num_agents)]
            US_hits = [0 for i in range(num_agents)]
            THEM_hits = [0 for i in range(num_agents)]

            env_obs = env.reset()  # Environment return observations
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            # Unpack observations into data structure compatible with agent Policy
            agents_obs = unpack_env_obs(env_obs)
    
            for i in range(num_ai_agents):    # Reset agent info - laser tag statistics
                agents[i].reset_info()    
    
            if render:
                env.render()
                time.sleep(1/15)  # Change speed of video rendering
    
            """
            # For Debug only
            print (len(agents_obs))
            print (agents_obs[0].shape)
            """
    
            """
            For now, we do not stack observations, and we do not implement LSTM
    
            state = np.stack([state]*num_frames)

            # Reset LSTM hidden units when episode begins
            cx = Variable(torch.zeros(1, 256))
            hx = Variable(torch.zeros(1, 256))
            """

            for frame in range(max_frames):

                for i in range(num_ai_agents):    # For AI agents
                    actions[i], _ = select_action(agents[i], agents_obs[i], cuda=cuda)
                    if actions[i] is 6:  # action[i] is a tensor, .item() returns the integer
                        tags[i] += 1   # record a tag for accessing aggressiveness
                
                for i in range(num_ai_agents, num_agents):   # For random agents
                    actions[i] = agents[i].select_action(agents_obs[i])
                    if actions[i] is 6:
                        tags[i] += 1   # record a tag for accessing aggressiveness
        
                """
                For now, we do not implement LSTM
                # Select action
                action, log_prob, state_value, (hx,cx)  = select_action(model, state, (hx,cx))        
                """

                # if frame % 10 == 0:
                #     print (actions)    
            
                # Perform step        
                env_obs, reward, done, info = env.step(actions)
        
                """
                For Debug only
                print (env_obs)
                print (reward)
                print (done) 
                """

                for i in range(num_ai_agents):
                    agents[i].rewards.append(reward[i])  # Stack rewards

        
                # Unpack observations into data structure compatible with agent Policy
                agents_obs = unpack_env_obs(env_obs)
                load_info(agents, narrate=False)   # Load agent info for AI agents
        
                for i in range(num_agents):
                    US_hits[i] += agents[i].US_hit
                    THEM_hits[i] += agents[i].THEM_hit
            
                """
                For now, we do not stack observation, may come in handy later on
        
                # Evict oldest diff add new diff to state
                next_state = np.stack([next_state]*num_frames)
                next_state[1:, :, :] = state[:-1, :, :]
                state = next_state
                """
        
                if render:
                    env.render()
                    time.sleep(1/15)  # Change speed of video rendering

                if any(done):
                    print("Done after {} frames".format(frame))
                    break
            
            # Print out statistics of AI agents
            ep_rewards = 0
            ep_tags = 0
            ep_US_hits = 0
            ep_THEM_hits = 0

            if verbose:
                print ('\nStatistics by Agent')
                print ('===================')
            for i in range(num_ai_agents):
                agent_tags = sum(agents[i].tag_hist)
                ep_tags += agent_tags
                cum_agent_tags[i] += agent_tags

                agent_reward = sum(agents[i].rewards)
                ep_rewards += agent_reward
                cum_agent_rewards[i] += agent_reward

                agent_US_hits = sum(agents[i].US_hits)
                agent_THEM_hits = sum(agents[i].THEM_hits)
                ep_US_hits += agent_US_hits
                ep_THEM_hits += agent_THEM_hits
                cum_agent_US_hits[i] += agent_US_hits
                cum_agent_THEM_hits[i] += agent_THEM_hits
        
                if verbose:
                    print ("Agent{} aggressiveness is {:.2f}".format(i, agent_tags/frame))
                    print ("Agent{} reward is {:d}".format(i, agent_reward))
                    print('US agents hit = {}'.format(agent_US_hits))
                    print('THEM agents hit = {}'.format(agent_THEM_hits ))
        
            cum_rewards += ep_rewards
            cum_tags += ep_tags
            cum_US_hits += ep_US_hits
            cum_THEM_hits += ep_THEM_hits
    
            if verbose:
                print ('\nStatistics in Aggregate')
                print ('=======================')
                print ('Total rewards gathered = {}'.format(ep_rewards))
                print ('Num laser fired = {}'.format(ep_tags))
                print ('Total US Hit (friendly fire) = {}'.format(ep_US_hits))
                print ('Total THEM Hit = {}'.format(ep_THEM_hits))
                print ('friendly fire (%) = {0:.3f}'.format(ep_US_hits/(ep_US_hits+ep_THEM_hits+1e-7)))

            if verbose:
                print ('\nStatistics by Tribe')
                print ('===================')
            for i, t in enumerate(tribes):
                if t.name is not 'Crazies':
                    ep_tribe_reward = sum(t.sum_rewards())
                    cum_tribe_rewards[i] += ep_tribe_reward
                    if verbose:
                        print ('Tribe {} has total reward of {}'.format(t.name, ep_tribe_reward))

            for i in range(num_ai_agents):
                agents[i].clear_history()

        env.close()  # Close the rendering window
        end = time.time()

        print ('\nAverage Statistics in Aggregate')
        print ('=================================')
        total_rewards = cum_rewards/max_episodes
        print ('Total rewards gathered = {:.1f}'.format(total_rewards))
        av_agent_reward[dir_num][eps_num] = cum_rewards/max_episodes/num_ai_agents
        print ('Av. agent reward = {:.2f}'.format(av_agent_reward[dir_num][eps_num]))
        print ('Num laser fired = {:.1f}'.format(cum_tags/max_episodes))
        print ('Total US Hit (friendly fire) = {:.1f}'.format(cum_US_hits/max_episodes))
        print ('Total THEM Hit = {:.1f}'.format(cum_THEM_hits/max_episodes))
        print ('friendly fire (%) = {:.3f}'.format(cum_US_hits/(cum_US_hits+cum_THEM_hits+1e-7)))

        print ('\nAverage Statistics by Tribe')
        print ('=============================')
       
        for i, tribe in enumerate(tribes):
            if tribe.name is not 'Crazies':
                tribe_reward = cum_tribe_rewards[i]/max_episodes
                print ('Tribe {} has total reward of {:.1f}'.format(tribe.name, tribe_reward))    
                
                # Keep track of dominating team and the rewards gathered
                if tribe_reward > dom_tribe_reward[dir_num][eps_num]:   
                    dom_tribe_reward[dir_num][eps_num] = tribe_reward
                    dominating_tribe[dir_num][eps_num]  = tribe.name

        # Team dominance calculation                           
        print ('Dominating Tribe: {}'.format(dominating_tribe[dir_num][eps_num]))
        dominance[dir_num][eps_num] = dom_tribe_reward[dir_num][eps_num]/((total_rewards - \
                                                dom_tribe_reward[dir_num][eps_num]+1.1e-7)/(len(tribes)-1))    
        print ('Team dominance: {0:.2f}x'.format(dominance[dir_num][eps_num]))

        print ('\nAverage Statistics by Agent')
        print ('=============================')
        for i in range(num_ai_agents):
            print ("Agent{} of {} aggressiveness is {:.2f}".format(i, agents[i].tribe, \
                                                           cum_agent_tags[i]/(max_episodes*max_frames)))
            print ("Agent{} reward is {:.1f}".format(i, cum_agent_rewards[i]/max_episodes))
            print('US agents hit = {:.1f}'.format(cum_agent_US_hits[i]/max_episodes))
            print('THEM agents hit = {:.1f}'.format(cum_agent_THEM_hits[i]/max_episodes))

        print('Training time per epochs: {:.2f} sec'.format((end-start)/max_episodes))

# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)

###### Dir = MA_models/3T-9L/warlike/p-1.0_r0.5/ #######
###### Trained episodes = 1000 #######
Load saved model for agent 0
Load saved model for agent 1
Load saved model for agent 2
Load saved model for agent 3
Load saved model for agent 4
Load saved model for agent 5
Load saved model for agent 6
Load saved model for agent 7
Load saved model for agent 8
..............................
Average Statistics in Aggregate
Total rewards gathered = 0.0
Av. agent reward = 0.00
Num laser fired = 350.8
Total US Hit (friendly fire) = 272.7
Total THEM Hit = 1753.8
friendly fire (%) = 0.135

Average Statistics by Tribe
Tribe Vikings has total reward of 0.0
Tribe Saxons has total reward of 0.0
Tribe Franks has total reward of 0.0
Dominating Tribe: None
Team dominance: 0.00x

Average Statistics by Agent
Agent0 of Vikings aggressiveness is 0.04
Agent0 reward is 0.0
US agents hit = 78.0
THEM agents hit = 234.0
Agent1 of Vikings aggressiveness is 0.04
Agent1 reward is 0.0
US agents hit = 39.0
THEM agents

In [13]:
# Note: Statistics for Research Report        
for reward in av_agent_reward:   # Average agent reward
    print(reward)
    
for tribe in dominating_tribe:   # Dominating team
    print(tribe)

for value in dominance:      # Team dominance
    print(value)

[47.3, 47.68888888888889, 44.7962962962963, 42.82962962962963, 45.81111111111111]
[48.97777777777778, 54.507407407407406, 50.48148148148148, 50.51851851851852, 47.04814814814815]
[50.03703703703704, 48.7, 49.47777777777778, 45.18518518518519, 48.98888888888889]
[48.492592592592594, 43.166666666666664, 42.67777777777778, 41.10740740740741, 40.40740740740741]
[47.833333333333336, 46.81481481481481, 46.7, 49.88518518518518, 0.0]
[0.0, 0.0, 0, 0, 0]
[0, 0, 0, 0, 0]
['Vikings', 'Vikings', 'Vikings', 'Vikings', 'Vikings']
['Franks', 'Franks', 'Franks', 'Franks', 'Franks']
['Franks', 'Franks', 'Franks', 'Franks', 'Franks']
['Franks', 'Franks', 'Franks', 'Vikings', 'Vikings']
['Franks', 'Franks', 'Franks', 'Franks', None]
[None, None, None, None, None]
[None, None, None, None, None]
[1.4652014645454938, 1.7337973023457258, 1.6729426046886255, 2.831418422948887, 1.9958003542907217]
[1.8125991053796198, 2.1544107258843255, 2.381227899733518, 1.9530502816338113, 2.7170441869724757]
[3.46410515441