# General Game Playing
This notebook contains the necessary python for doing General Game Playing with Tangled Program Graphs.

In [None]:
# imports and helper methods
from tpg.tpg_trainer import TpgTrainer
from tpg.tpg_agent import TpgAgent
import gym
import gym.spaces

"""
inState is row x col x rgba list. This converts it to a 1D list. Because 
that is what TPG uses.
"""
def getState(inState):
    outState = []
    for row in inState:
        for cell in row:
            outState.append(cell[0]/8 + cell[1]*4 + cell[2]*128)
    
    return outState

"""
Run each agent in this method for parallization.
Args:
    args: (TpgAgent, envName, scoreList, episodes)
"""
def runAgent(args):
    agent = args[0]
    envName = args[1]
    scoreList = args[2]
    episodes = args[3] # number of times to repeat game
    
    # skip if task already done by agent
    if agent.taskDone(envName):
        print('Agent #' + str(agent.getAgentNum()) + ' can skip.')
        scoreList.append((agent.getUid(), agent.getOutcomes()))
        return
    
    env = gym.make(envName)
    valActs = range(env.action_space.n) # valid actions, some envs are less
    
    scoreTotal = 0 # score accumulates over all episodes
    for ep in episodes: # episode loop
        state = env.reset()
        scoreEp = 0
        for i in range(1000): # frame loop
            act = agent.act(getState(state), valActs=valActs)

            # feedback from env
            state, reward, isDone, debug = env.step(act)
            scoreEp += reward # accumulate reward in score
            if isDone:
                break # end early if losing state
                
        print('Agent #' + str(agent.getAgentNum()) + 
              ' | Ep #' + str(ep) + ' | Score: ' + str(scoreEp))
        scoreTotal += scoreEp
        
    env.close()
    agent.reward(scoreTotal)
    scoreList.append((agent.getUid(), agent.getOutcomes()))