In [1]:
from ast import literal_eval
import ray
import gym
import retro
import os
import numpy as np
import matplotlib.pyplot as plt
from markov import sampleMarkov, createMarkov, randMarkov
from support import getInitial, verifyTrajectory, install_games_from_rom_dir, frameToCell, action_set, trajectoryToGif

ModuleNotFoundError: No module named 'ray'

In [None]:
import imageio
imageio.plugins.freeimage.download()

In [None]:
ray.init()

In [None]:
@ray.remote
class GoExploreActor(object):
    def __init__(self, name):
        self.env = retro.make(name)
        self.env.reset()
        
    def setPolicy(self, policy):
        self.policy_type = policy['type']
        if self.policy_type=='markov':
            self.policy_weights = policy['weights']
    
    def updateCache(self, cells, fitnesses):
        self.cells = cells
        self.fitnesses = fitnesses
    
    def GoExplore(self, state, trajectory, steps):
        results = []
        visits = []
        self.env.em.set_state(state)      
        
        recurrent_state = None
        
        if self.policy_type=='markov':
            recurrent_state = np.random.randint(12)
        
        for i in range(steps):
            action = None
            if self.policy_type == 'random':
                action = np.random.randint(12)
            if self.policy_type == 'markov':
                action = sampleMarkov(recurrent_state, self.policy_weights)
                recurrent_state = action
                
            observation, reward, done, info = self.env.step(action_set[action])
            trajectory.append(action)
            
            cell = frameToCell(observation, info)
            
            if cell not in visits:
                visits.append(cell)
                
            if cell in self.cells:
                if len(trajectory) < self.fitnesses[cell]:
                    results.append((cell, trajectory.copy(), self.env.em.get_state(), info, i))
            else:
                results.append((cell, trajectory.copy(), self.env.em.get_state(), info, i))
                self.cells.append(cell)
                self.fitnesses[cell] = len(trajectory)
        return (results, visits)

    def reset(self):
        self.env.reset()

In [None]:
install_games_from_rom_dir('roms/')

game = 'SonicTheHedgehog-Genesis'
stateStr = 'GreenHillZone.Act1.state'

policy = {'type':'markov', 'weights':randMarkov(10,12)}

initialCell, initialState, initialTrajectory, initialFitness = getInitial(game, stateStr)

cells = [initialCell]
fitnesses = {initialCell:initialFitness}
cell_prob = {initialCell:1}
trajectories = {initialCell:initialTrajectory}
states = {initialCell:initialState}

NWorkers = 2

workers = [ GoExploreActor.remote(game) for _ in range(NWorkers)]

for worker in workers:
    worker.setPolicy.remote(policy)
    

In [None]:
def winCondition(cell, info):
    return info['level_end_bonus'] != 0

best_trajectory = None
verbose = True

In [None]:
stat_cells = np.zeros(500)
stat_avglen = np.zeros(500)
stat_runlen = np.zeros(500)
#Optionally add other numbers of steps to this list to alternate shorter episodes
nsteps = [500]

for i in range(500):
    print('Iteration ' + str(i))
    
    #Pass the full cell table and fitnesses table to each worker
    for worker in workers:
        worker.updateCache.remote(ray.put(cells), ray.put(fitnesses))
    normalized_cell_prob = np.array([cell_prob[c] for c in cells])
    normalized_cell_prob = normalized_cell_prob/normalized_cell_prob.sum()
    goCells = [cells[r] for r in np.random.choice(np.arange(len(cells)),size=(NWorkers) , p = normalized_cell_prob )]
    #Select a cell randomly
    #goCells = [cells[np.random.randint(len(cells))] for i in range(NWorkers)]
    R = ray.get( [ workers[i].GoExplore.remote(ray.put(states[goCells[i]]), ray.put(trajectories[goCells[i]]), nsteps[i%len(nsteps)]) for i in range(NWorkers)]   )
    
    #Complie results and update master tables
    results = []
    visits = []
    for r, v in R:
        results+=r
        visits+=v
    
    for cell, trajectory, state, info, iteration in results:
        if cell in cells:
            if len(trajectory) < fitnesses[cell]:
                if verbose:
                    print('Shortened trajectory to ' + cell + ' from ' 
                          + str(fitnesses[cell]) + ' to ' 
                          + str(len(trajectory)) + ', saving ' 
                          + str(fitnesses[cell]-len(trajectory)) + ' frames at step '
                          + str(iteration))
                fitnesses[cell] = len(trajectory)
                trajectories[cell] = trajectory
                states[cell] = state
                cell_prob[cell] += 1
        else:
            if winCondition(cell, info):
                if best_trajectory is None:
                    best_trajectory = trajectory
                    print('Improved trajectory: ' + str(len(best_trajectory)) + ' Frames')
                elif len(trajectory)<len(best_trajectory):
                    best_trajectory = trajectory       
                    print('Improved trajectory: ' + str(len(best_trajectory)) + ' Frames')             
            else:
                if verbose:
                    print('Found cell ' + cell + ' at step ' + str(iteration) + ', total ' + str(len(cells)) + ' found')
                cells.append(cell)
                fitnesses[cell] = len(trajectory)
                trajectories[cell] = trajectory
                states[cell] = state
                cell_prob[cell] = 10
    
    for cell in visits:
        cell_prob[cell] *= .5
    if best_trajectory is None:
        if i < 100 and i%2==0:
            policy['weights'] = randMarkov(10,12)
        else:
            policy['weights'] = createMarkov(trajectories[cells[-1]],12)
    else:
        policy['weights'] = createMarkov(best_trajectory)
        stat_runlen[i] = len(best_trajectory)
    
    for worker in workers:
        worker.setPolicy.remote(policy)
        
    stat_cells[i] = len(cells)
    stat_avglen[i] = np.array(list(fitnesses.values())).mean()


In [None]:
plt.plot(stat_cells)
plt.show()
plt.plot(stat_avglen)
plt.show()
plt.plot(stat_runlen)
plt.show()

In [None]:

cell = cells[np.array([literal_eval(cell)[0] for cell in cells]).argsort()[-1]]

c = literal_eval(cell)
trajectory = trajectories[cell]
trajectoryToGif(game, stateStr, trajectory, True, 'Gameplay_'+str(len(trajectory))+'-'+str(c[0])+'-'+str(c[1])+'-'+str(c[2])+'-'+str(c[3])+'.gif')