In [None]:
from matplotlib import animation
import matplotlib.pyplot as plt
import gym 
from gym import envs
import numpy as np
import time
def save_frames_as_gif(frames, path='./', filename='gym_animation_mp.gif'):

    plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi=72)
    patch = plt.imshow(frames[0])
    plt.axis('off')
    def animate(i):
        patch.set_data(frames[i])
    anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=50)
    anim.save(path + filename, writer='imagemagick', fps=60)

In [None]:
class ParticleSwarmOptimisation(object):
    def __init__(self, fitnessFunction, bounds, numParticles, omega, v, phiL, phiG,size=8):
        self.bestGlobalFitness = -np.inf              
        self.bestGlobalPos = []     
        self.fitnessFunction = fitnessFunction
        self.bounds = bounds 
        self.swarm = [] #Create swarm
        for i in range(numParticles):
            self.swarm.append(Particle(bounds, omega, v, phiL, phiG,particle_size=size))

    def optimize(self, maximumEpochs, target):
        bestGlobalFitness = -np.inf              
        bestGlobalPos = []                
        #Optimisation loop
        for i in range(1, maximumEpochs):
            #Evaluate each particles fitness
            for particle in self.swarm:
                fitness = particle.evaluate(self.fitnessFunction)
                #Determine if current particle is new global best
                if fitness > bestGlobalFitness:
                    bestGlobalPos = particle.bestPos.copy()
                    bestGlobalFitness = particle.bestFitness
                    self.bestGlobalPos=bestGlobalPos
            #Update velocity and positions
            for p in self.swarm:
                p.updateVelocity(bestGlobalPos)
                p.updatePosition(self.bounds)     
            #Resample best to see if environment is solved
            bestGlobalFitness = self.fitnessFunction(bestGlobalPos, 100)
            print('Iteration: ' + str(i) + ' Global best: ' + str(bestGlobalFitness))
            if bestGlobalFitness > target:
                self.bestGlobalPos=bestGlobalPos
                return i
        return i #failed to solve

class Particle:
    def __init__(self, bounds, omega, v, phiL, phiG,particle_size=8):
        self.bestFitness = -np.inf     
        self.bestPos = []
        self.omega, self.phiL, self.phiG  = omega, phiL, phiG

        self.position =  np.random.uniform(low=bounds[0], high=bounds[1], size=particle_size)
        self.velocity = np.random.uniform(low=-v, high=v, size=particle_size)

    # evaluate current fitness
    def evaluate(self, fitnessFunction):
        fitness = fitnessFunction(self.position, 25)
        #update best position
        if fitness > self.bestFitness:
            self.bestPos = self.position.copy()
            self.bestFitness = fitness
        else:
            #Re-evaluate best 
            self.bestFitness = fitnessFunction(self.bestPos, 25)
        return self.bestFitness
                    
    # update new particle velocity
    def updateVelocity(self, globalBest):
        velLocal= self.phiL * np.random.rand((len(self.bestPos))) * (self.bestPos - self.position)
        velGlobal = self.phiG * np.random.rand((len(self.bestPos))) * (globalBest - self.position)
        self.velocity = self.omega * self.velocity + velLocal + velGlobal

    # update the particle position 
    def updatePosition(self,bounds):  
        self.position = self.position + self.velocity            
        self.position[self.position < bounds[0]] = bounds[0]
        self.position[self.position > bounds[1]] = bounds[1] 

In [None]:
class CartpoleFitness(object):

    def __init__(self, terminationStep):
        self.env = gym.make('CartPole-v1')
        self.terminationStep = terminationStep

    def policy(self, state, pos):
        z = state.dot(pos)
        exp = np.exp(z)
        return exp/np.sum(exp)

    def evaluate(self, pos, evaluationIterations):
        policy = np.reshape(pos, (4,2))
        rewardTotal = 0
        for i in range(evaluationIterations):
            state = self.env.reset()
            step = 0
            while True:
                step += 1
                probs = self.policy(state, policy)
                action = np.random.choice(2,p=probs)
                state, reward, terminal, _ = self.env.step(action)
                rewardTotal += reward
                if terminal or step > self.terminationStep:
                    break
        return rewardTotal/(i+1)

In [None]:
cartpole = CartpoleFitness(200)

weightSpaceBounds = 8
numberOfParticles = 15
momentum = 0.5
initialVelocityBounds = 0.25
localWeight = 2
globalWeight = 2

p = [weightSpaceBounds, numberOfParticles, momentum, initialVelocityBounds, localWeight, globalWeight]

timeTotal = 0
iterationTotal = 0
for i in range(1, 11):
    start = time.time()
    solver = ParticleSwarmOptimisation(cartpole.evaluate, (-p[0],p[0]), p[1], p[2], p[3], p[4], p[5])
    k = solver.optimize(25, 195)
    iterationTotal += k
    end = time.time() - start
    print('Trail ' + str(i) + ' solved in ' + str(k) + ' iterations and ' + "{:.1f}".format(end) + ' seconds.')
    timeTotal += end
print('Solved in an average of ' + str(iterationTotal/i) + ' iterations, with ' + "{:.1f}".format(timeTotal /i) + ' seconds per trail.')

In [None]:
env = gym.make('CartPole-v1')
observation = env.reset()
frames = []
rewards=[]
for t in range(1000):
    #Render to frames buffer
    if t==0:
        state=env.reset()
    else:
        state,reward,
    frames.append(env.render(mode="rgb_array"))
    z= state.dot(solver.bestGlobalPos.reshape(-1,2))
    probs = np.exp(z)/np.sum(np.exp(z))
    action = np.random.choice(2,p=probs)
    state, reward, terminal, _ = env.step(action)
    rewards.append(reward)
    if terminal:
        break
env.close()
save_frames_as_gif(frames)

In [None]:
class AcrobotFitness(object):

    def __init__(self, terminationStep):
        self.env = gym.make('Acrobot-v1')
        self.terminationStep = terminationStep

    def policy(self, state, pos):
        z = state.dot(pos)
        exp = np.exp(z)
        return exp/np.sum(exp)

    def evaluate(self, pos, evaluationIterations):
        policy = np.reshape(pos, (6,3))
        rewardTotal = 0
        for i in range(evaluationIterations):
            state = self.env.reset()
            step = 0
            while True:
                step += 1
                probs = self.policy(state, policy)
                action = np.random.choice(3,p=probs)
                state, reward, terminal, _ = self.env.step(action)
                rewardTotal += reward
                if terminal or step > self.terminationStep:
                    break
        return rewardTotal/(i+1)

In [None]:
mountainCar = AcrobotFitness(200)

weightSpaceBounds = 8
numberOfParticles = 10
momentum = 0.5
initialVelocityBounds = 0.25
localWeight = 2
globalWeight = 2

p = [weightSpaceBounds, numberOfParticles, momentum, initialVelocityBounds, localWeight, globalWeight]

timeTotal = 0
iterationTotal = 0
for i in range(1, 11):
    start = time.time()
    solver = ParticleSwarmOptimisation(mountainCar.evaluate, (-p[0],p[0]), p[1], p[2], p[3], p[4], p[5],size=18)
    k = solver.optimize(20, -70)
    iterationTotal += k
    end = time.time() - start
    print('Trail ' + str(i) + ' solved in ' + str(k) + ' iterations and ' + "{:.1f}".format(end) + ' seconds.')
    timeTotal += end
print('Solved in an average of ' + str(iterationTotal/i) + ' iterations, with ' + "{:.1f}".format(timeTotal /i) + ' seconds per trail.')

In [None]:
env = gym.make('Acrobot-v1')
observation = env.reset()
frames = []
rewards=[]
for t in range(1000):
    #Render to frames buffer
    if t==0:
        state=env.reset()
    else:
        state,reward,
    frames.append(env.render(mode="rgb_array"))
    z= state.dot(solver.bestGlobalPos.reshape(-1,3))
    probs = np.exp(z)/np.sum(np.exp(z))
    action = np.random.choice(3,p=probs)
    state, reward, terminal, _ = env.step(action)
    rewards.append(reward)
    if terminal:
        break
env.close()
save_frames_as_gif(frames)