# General Game Playing
This notebook contains the necessary python for doing General Game Playing with Tangled Program Graphs.

In [1]:
# imports and helper methods
from tpg.tpg_trainer import TpgTrainer
from tpg.tpg_agent import TpgAgent

import gym
import gym.spaces

import multiprocessing as mp
import time
import random
import psutil
import os


"""
inState is (row x col x rgba) list. This converts it to a 1-D list. Because 
that is what TPG uses.
"""
def getState(inState):
    outState = []
    for row in inState:
        for cell in row:
            outState.append(cell[0]/8 + cell[1]*4 + cell[2]*128)
    
    return outState

"""
Run each agent in this method for parallization.
Args:
    args: (TpgAgent, envName, scoreList, numEpisodes)
"""
def runAgent(args):
    agent = args[0]
    envName = args[1]
    scoreList = args[2]
    numEpisodes = args[3] # number of times to repeat game
    
    # skip if task already done by agent
    if agent.taskDone(envName):
        print('Agent #' + str(agent.getAgentNum()) + ' can skip.')
        scoreList.append((agent.getUid(), agent.getOutcomes()))
        return
    
    env = gym.make(envName)
    valActs = range(env.action_space.n) # valid actions, some envs are less
    
    scoreTotal = 0 # score accumulates over all episodes
    for ep in range(numEpisodes): # episode loop
        state = env.reset()
        scoreEp = 0
        for i in range(1000): # frame loop
            act = agent.act(getState(state), valActs=valActs)

            # feedback from env
            state, reward, isDone, debug = env.step(act)
            scoreEp += reward # accumulate reward in score
            if isDone:
                break # end early if losing state
                
        print('Agent #' + str(agent.getAgentNum()) + 
              ' | Ep #' + str(ep) + ' | Score: ' + str(scoreEp))
        scoreTotal += scoreEp
        
    env.close()
    agent.reward(scoreTotal, envName)
    scoreList.append((agent.getUid(), agent.getOutcomes()))
    
# https://stackoverflow.com/questions/42103367/limit-total-cpu-usage-in-python-multiprocessing/42130713
def limit_cpu():
    p = psutil.Process(os.getpid())
    p.nice(19)
    
# all of the titles we will be general game playing on
gymEnvNames = ['Alien-v0','Asteroids-v0','Atlantis-v0','BankHeist-v0',
               'BattleZone-v0','Bowling-v0','Boxing-v0','Centipede-v0',
               'ChopperCommand-v0','DoubleDunk-v0','FishingDerby-v0',
               'Freeway-v0','Frostbite-v0','Gravitar-v0','Hero-v0',
               'IceHockey-v0','Jamesbond-v0','Kangaroo-v0','Krull-v0',
               'KungFuMaster-v0','MsPacman-v0','PrivateEye-v0',
               'RoadRunner-v0','Skiing-v0','Tennis-v0','TimePilot-v0',
               'UpNDown-v0','Venture-v0','WizardOfWor-v0','Zaxxon-v0']

In [None]:
trainer = TpgTrainer(actions=range(18))

processes = 4
pool = mp.Pool(processes=processes, initializer=limit_cpu)
man = mp.Manager()

curEnvs = []
numActiveEnvs = 10

numEpisodes = 5

while True: # do generations with no end
    scoreList = man.list()
    
    # reload curGames if needed
    if len(curEnvs) == 0:
        curEnvs = list(gymEnvNames)
        random.shuffle(curEnvs)
        curEnvs = curEnvs[:numActiveEnvs]
        
    curEnv = curEnvs.pop() # get env to play on this generation
    
    pool.map(runAgent, 
        [(agent, curEnv, scoreList, numEpisodes)
        for agent in trainer.getAllAgents(skipTasks=[])])
    
    # apply scores
    trainer.applyScores(scoreList)
    trainer.evolve() # go into next gen
    
    # save model after every gen
    with open('saved-model-1.pkl','wb') as f:
        pickle.dump(trainer,f)
        
    clear_output(wait=True)
    print('Time Taken (Seconds): ' + str(time.time() - tStart))
    print('On Generation: ' + trainer.curGen)
    print('Results so far: ' + 
          str(trainer.generateScoreStats(tasks=['curEnv'])))

before ep loop
in ep loop
0
1
2
3
4
5
6
before ep loop
in ep loop
0
7
1
8
2
9
3
10
4
11
5
12
6
13
7
14
8
15
9
before ep loop
in ep loop
0
16
10
1
17
11
2
18
12
3
19
13


Process ForkPoolWorker-3:
Process ForkPoolWorker-1:
Process ForkPoolWorker-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  

Traceback (most recent call last):
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-a906dba23e9a>", line 25, in <module>
    for agent in trainer.getAllAgents(skipTasks=[])])
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/pool.py", line 266, in map
    return self._map_async(func, iterable, mapstar, chunksize).get()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/pool.py", line 638, in get
    self.wait(timeout)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/pool.py", line 635, in wait
    self._event.wait(timeout)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/threading.py", line 551, in wait
    signaled = self._cond.wait(timeout)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/threading.py", line 295, in wait
    waiter.acquire()
Ke

KeyboardInterrupt: 

Process ForkPoolWorker-8:
Process ForkPoolWorker-7:
Process ForkPoolWorker-6:
Process ForkPoolWorker-9:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/amaral/anaconda2/envs/oaigym/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*