# TPG Examples
This document shows how to use the PyTPG API. We make use of OpenAI Gym to run examples, and we assume you already have PyTPG installed, see the readme for installation instructions for PyTPG.

## General Setup

In [1]:
from __future__ import division
from IPython.display import clear_output
# imports to run OpenAI Gym in Jupyter
import gym
import matplotlib.pyplot as plt
from IPython import display

# how to render in Jupyter: 
# https://stackoverflow.com/questions/40195740/how-to-run-openai-gym-render-over-a-server
# https://www.youtube.com/watch?v=O84KgRt6AJI
def show_state(env, step=0, name='', info=''):
    plt.figure(3)
    plt.clf()
    plt.imshow(env.render(mode='rgb_array'))
    plt.title("%s | Step: %d %s" % (name, step, info))
    plt.axis('off')

    display.clear_output(wait=True)
    display.display(plt.gcf())
    
# transforms the state into what the tpg agent can use.
# From 3D to 1D, taking only red data (from rgb array)
def getState(state):
    state2 = []
    for x in state:
        for y in x:
            state2.append(y[0])
            
    return state2

In [6]:
env = gym.make('Assault-v0') # make the environment

In [3]:
print(env.action_space) # learn size of action space

Discrete(7)


In [2]:
# import to do training
from tpg.tpg_trainer import TpgTrainer
# import to run an agent (always needed)
from tpg.tpg_agent import TpgAgent

## Generational Selection

In [8]:
%matplotlib inline
import time # for tracking time

tStart = time.time()

# first create an instance of the TpgTrainer
# this creates the whole population and everything
trainer = TpgTrainer(actions=range(7), teamPopSizeInit=50)

curScores = [] # hold scores in a generation
summaryScores = [] # record score summaries for each gen (min, max, avg)

# 5 generations isn't much (not even close), but some improvements
# should be seen.
for gen in range(5): # generation loop
    curScores = [] # new list per gen
    
    while True: # loop to go through agents
        teamNum = trainer.remainingAgents()
        agent = trainer.getNextAgent()
        if agent is None:
            break # no more agents, so proceed to next gen
        
        state = env.reset() # get initial state and prep environment
        score = 0
        for i in range(200): # run episodes that last 200 frames
            #show_state(env, i, 'Assault', 'Gen #' + str(gen) + 
            #           ', Team #' + str(teamNum) +
            #           ', Score: ' + str(score)) # render env
            
            act = agent.act(getState(state)) # get action from agent
            
            # feedback from env
            state, reward, isDone, debug = env.step(act)
            score += reward # accumulate reward in score
            if isDone:
                break # end early if losing state
                
        agent.reward(score) # must reward agent
        curScores.append(score) # store score
            
    # at end of generation, make summary of scores
    summaryScores.append((min(curScores), max(curScores),
                    sum(curScores)/len(curScores))) # min, max, avg
    trainer.evolve()
    
print('Time Taken (Seconds): ' + str(time.time() - tStart))
print('Results:\nMin, Max, Avg')
for result in summaryScores:
    print(result[0],result[1],result[2])

KeyboardInterrupt: 

# Generational Selection with Multiprocessing

In [3]:
import multiprocessing as mp
import time

tStart = time.time()

lock = mp.Lock()

trainer = TpgTrainer(actions=range(7), teamPopSizeInit=50)

processes = 5

m = mp.Manager()
envQueue = m.Queue()
# each worker needs its own environment
for i in range(processes):
    envQueue.put(gym.make('Assault-v0'))
    
summaryScores = [] # record score summaries for each gen (min, max, avg)

# run agent in function to work with multiprocessing
def runAgent(agenteqsq):
    agent = agenteqsq[0] 
    eq = agenteqsq[1] 
    sq = agenteqsq[2]
    print('Waiting for env...')
    print(eq.qsize())
    env = eq.get() # get an environment
    print('Agent #' + str(agent.getAgentNum()) + ' starting.')
    state = env.reset() # get initial state and prep environment
    score = 0
    for i in range(200): # run episodes that last 200 frames
        act = agent.act(getState(state)) # get action from agent

        # feedback from env
        state, reward, isDone, debug = env.step(act)
        score += reward # accumulate reward in score
        if isDone:
            break # end early if losing state
    lock.acquire()
    agent.reward(score) # must reward agent
    lock.release()
    sq.put(score) # store score
    
    print('Agent #' + str(agent.getAgentNum()) + ' finished with score ' + str(score))
    
    eq.put(env) # put environment back
    
    
for gen in range(5): # generation loop
    curScores = m.Queue() # hold scores in a generation (queue so thread safe)
    
    # run generation
    pool = mp.Pool(processes=processes)
    pool.map(runAgent, 
                 [(agent, envQueue, curScores) 
                  for agent in trainer.getAllAgents()])
         
    scores = [] # convert scores into list
    while not curScores.empty():
        scores.append(curScores.get())

    # at end of generation, make summary of scores
    summaryScores.append((min(scores), 
                    max(scores),
                    sum(scores)/len(scores))) # min, max, avg
    trainer.evolve()
    
clear_output(wait=True)
print('Time Taken (Seconds): ' + str(time.time() - tStart))
print('Results: ' + str(summaryScores))

Time Taken (Seconds): 227.312779188
Results: [(0.0, 168.0, 10.5), (0.0, 168.0, 15.96), (0.0, 147.0, 12.409090909090908), (0.0, 168.0, 15.076923076923077), (0.0, 168.0, 32.73529411764706)]


Process PoolWorker-21:
Process PoolWorker-14:
Process PoolWorker-12:
Process PoolWorker-17:
Process PoolWorker-18:
Process PoolWorker-9:
Process PoolWorker-23:
Process PoolWorker-24:
Process PoolWorker-4:
Process PoolWorker-10:
Traceback (most recent call last):
Process PoolWorker-15:
Process PoolWorker-11:
Process PoolWorker-20:
Process PoolWorker-13:
Process PoolWorker-2:
Process PoolWorker-5:
Process PoolWorker-19:
Process PoolWorker-6:
Process PoolWorker-26:
Process PoolWorker-7:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process PoolWorker-22:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process PoolWorker-25:
Process PoolWorker-3:
Process PoolWorker-8:
Process PoolWorker-16:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/process.py",

  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    self._target(*self._args, **self._kwargs)
  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    self._target(*self._args, **self._kwargs)
    self.run()
    self._target(*self._args, **self._kwargs)
    self._target(*self._args, **self._kwargs)
  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/pool.py", line 102, in worker
  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    task = get()
    task = get()
    task = get()
    self._target(*self._args, **self._kwargs)
    task = get()
    task = get()
    task = get()
    task = get()
    task = get()
  File "/home/ryan/anaconda3/envs/oaigym/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._tar

## Tournament Selection