# Classes and Functions

Execute all cells.

In [None]:
import random

class Grid_Environment:
    def __init__ (self, X, Y):
        self.X = X
        self.Y = Y
        self.state = (1,1)
        self.goal = (X,Y)

        states = []
        for x in range(X):
            for y in range(Y):
                states = states + [(x+1,y+1)]
        self.states = (*states, )

        self.actions = ('w', 's', 'd', 'a' )

    moveAction = {
        'w': (0,1),
        's': (0,-1),
        'd': (1,0),
        'a': (-1,0)
    }

    def reset(self):
        self.state = (1,1)

    def move(self,state,shift):
        x = min(max(state[0] + shift[0],1),self.X)
        y = min(max(state[1] + shift[1],1),self.Y)
        return (x,y)


    def reward(self,state, action):
        if state == self.goal:
            return 0
        else:
            return -1

    def nextState(self,state, action):
        nextStates = {}

        if state == self.goal:
          nextStates[self.goal] = 1.0
          return nextStates

        next = self.move(state,self.moveAction[action])
        nextStates[next] = 1.0

        return nextStates

    def simulateStep(self,state,action):
        r = self.reward(state, action)
        nextStates = self.nextState(state,action)
        return random.choices( list( nextStates.keys() ), weights = list( nextStates.values() ), k=1 )[0], r


    def step(self,action):
        self.state, r  = self.simulateStep(self.state,action)
        return self.state, r

    def render(self):
        print('')
        for j in range(self.Y,0,-1):
            for i in range(1,self.X+1):
                if self.state == (i,j):
                    print('A',end='')
                elif self.goal == (i,j):
                    print('G',end='')
                else:
                    print('*',end='')
            print('')

In [None]:
class GridWind_Environment(Grid_Environment):
    windUp = (0,1)
    windDown = (0,-1)
    windLeft = (-1,0)

    def nextState(self,state, action):
        nextStates = {}

        if state == self.goal:
          nextStates[self.goal] = 1.0
          return nextStates

        if state[0]/self.X >= 0.5 and state[0] < self.X and state[1]/self.Y > 0.5:
          next = self.move(state,self.moveAction[action])
          nextStates[next] = 0.5

          next = self.move(state,self.moveAction['w'])
          if next in nextStates:
              nextStates[next] = nextStates[next] + 0.25
          else:
              nextStates[next] = 0.25

          next = self.move(state,self.moveAction['a'])
          if next in nextStates:
              nextStates[next] = nextStates[next] + 0.25
          else:
              nextStates[next] =  0.25

        elif state[0]/self.X <= 0.5 and state[0] > 1 and state[1]/self.Y <= 0.5:
          next = self.move(state,self.moveAction[action])
          nextStates[next] = 0.5

          next = self.move(state,self.moveAction['s'])
          if next in nextStates:
              nextStates[next] = nextStates[next] + 0.25
          else:
              nextStates[next] =  0.25

          next = self.move(state,self.moveAction['a'])
          if next in nextStates:
              nextStates[next] = nextStates[next] + 0.25
          else:
              nextStates[next] =  0.25
        else:
          next = self.move(state,self.moveAction[action])
          nextStates[next] = 1.0


        return nextStates


    def render(self):
        print('')
        for j in range(self.Y,0,-1):
            for i in range(1,self.X+1):
                if self.state == (i,j):
                    print('A',end='')
                elif self.goal == (i,j):
                    print('G',end='')
                elif i/self.X >= 0.5 and i < self.X and j/self.Y > 0.5:
                    print('$',end='')
                elif i/self.X <= 0.5 and i > 1 and j/self.Y <= 0.5:
                    print('#',end='')
                else:
                    print('*',end='')
            print('')

In [None]:
class GridInvGoal_Environment(Grid_Environment):

    def __init__ (self, X, Y):
      Grid_Environment.__init__(self,X,Y)
      self.goal = (round(2*X/3),round(Y/3))

    def render(self):
        print('')
        for j in range(self.Y,0,-1):
            for i in range(1,self.X+1):
                if self.state == (i,j):
                    print('A',end='')
                # elif self.goal == (i,j):
                #     print('G',end='')
                else:
                    print('*',end='')
            print('')

In [None]:
class GridQuad_Environment(Grid_Environment):
    Q1 = { 'a' : 'd', 's' : 'w' , 'd' : 'a', 'w':'s' }
    Q2 = { 'a' : 'w', 's' : 'a' , 'd' : 's', 'w':'d' }
    Q3 = { 'a' : 'w', 's' : 'd' , 'd' : 'a', 'w':'s' }
    Q4 = { 'a' : 's', 's' : 'd' , 'd' : 'w', 'w':'a' }

    def nextState(self,state, action):
        nextStates = {}

        if state == self.goal:
          nextStates[self.goal] = 1.0
          return nextStates

        if state[0]/self.X <= 0.5 and state[1]/self.Y <= 0.5:
          next = self.move(state,self.moveAction[self.Q2[action]])
          nextStates[next] = 1.0
        elif state[0]/self.X > 0.5 and state[1]/self.Y <= 0.5:
          next = self.move(state,self.moveAction[self.Q3[action]])
          nextStates[next] = 1.0
        elif state[0]/self.X <= 0.5 and state[1]/self.Y > 0.5:
          next = self.move(state,self.moveAction[self.Q4[action]])
          nextStates[next] = 1.0
        elif state[0]/self.X > 0.5 and state[1]/self.Y > 0.5:
          next = self.move(state,self.moveAction[self.Q1[action]])
          nextStates[next] = 1.0

        return nextStates


    def render(self):
        print('')
        for j in range(self.Y,0,-1):
            for i in range(1,self.X+1):
                if self.state == (i,j):
                    print('A',end='')
                elif self.goal == (i,j):
                    print('G',end='')
                elif i/self.X <= 0.5 and j/self.Y <= 0.5:
                    print('$',end='')
                elif i/self.X > 0.5 and j/self.Y <= 0.5:
                    print('#',end='')
                elif i/self.X <= 0.5 and j/self.Y > 0.5:
                    print('@',end='')
                else:
                    print('*',end='')
            print('')

In [None]:
class GridQuadInv_Environment(Grid_Environment):
    Q1 = { 'a' : 'd', 's' : 'w' , 'd' : 'a', 'w':'s' }
    Q2 = { 'a' : 'w', 's' : 'a' , 'd' : 's', 'w':'d' }
    Q3 = { 'a' : 'w', 's' : 'd' , 'd' : 'a', 'w':'s' }
    Q4 = { 'a' : 's', 's' : 'd' , 'd' : 'w', 'w':'a' }

    def nextState(self,state, action):
        nextStates = {}

        if state == self.goal:
          nextStates[self.goal] = 1.0
          return nextStates

        if state[0]/self.X <= 0.3 and state[1]/self.Y <= 0.3:
          next = self.move(state,self.moveAction[self.Q3[action]])
          nextStates[next] = 1.0
        elif state[0]/self.X > 0.3 and state[1]/self.Y <= 0.3:
          next = self.move(state,self.moveAction[self.Q4[action]])
          nextStates[next] = 1.0
        elif state[0]/self.X <= 0.3 and state[1]/self.Y > 0.3:
          next = self.move(state,self.moveAction[self.Q1[action]])
          nextStates[next] = 1.0
        elif state[0]/self.X > 0.3 and state[1]/self.Y > 0.3:
          next = self.move(state,self.moveAction[self.Q2[action]])
          nextStates[next] = 1.0

        return nextStates


    def render(self):
        print('')
        for j in range(self.Y,0,-1):
            for i in range(1,self.X+1):
                if self.state == (i,j):
                    print('A',end='')
                elif self.goal == (i,j):
                    print('G',end='')
                # elif i/self.X <= 0.3 and j/self.Y <= 0.3:
                #     print('$',end='')
                # elif i/self.X > 0.3 and j/self.Y <= 0.3:
                #     print('#',end='')
                # elif i/self.X <= 0.3 and j/self.Y > 0.3:
                #     print('@',end='')
                else:
                    print('*',end='')
            print('')

In [None]:
import time
import numpy as np
from IPython.display import clear_output

def Simulate(env,maxSteps):
  env.reset()
  tempo = .25
  clear_output(wait=True)
  env.render()

  steps = 0

  while steps < maxSteps:

      breakSignal = False
      command = input("Write 'w', 'a', 's', 'd' to move: ")
      for a in command:
        s = env.state
        if s == env.goal:
            breakSignal = True
            break
        if a in ['w', 'a', 's', 'd']:
            env.step(a)
            steps += 1

        clear_output(wait=True)
        env.render()
        time.sleep(tempo)
      if env.state == env.goal or breakSignal:
        break

  if env.state == env.goal:
    print(f"Congratulations. You spent {steps} steps to reach the Goal.")
    input("Press enter to go to the next task.")
  else:
    print(f"You fail to reach the goal.")
    input("Press enter to go to the next task.")

  return steps

def EvaluatePolicyMC(env,policy,maxSteps,nSamples):

  memSteps = []
  memGoals = []

  for i in range(nSamples):
    env.reset()
    steps = 0

    while steps < maxSteps:
      s = env.state
      a = policy[s]
      if s == env.goal:
          break
      if a in ['w', 'a', 's', 'd']:
          env.step(a)
          steps += 1

    if env.state == env.goal:
      memGoals = memGoals + [1]
      memSteps = memSteps + [steps]
    else:
      memGoals = memGoals + [0]


    clear_output(wait=True)
    print(f'Simulating {(i+1)/nSamples}')

  print(f'The goal was rechead {100*np.mean(memGoals)}% of the simulations.')
  if len(memSteps) > 0:
    print(f'The average steps in simulations that rechead the goal was {np.mean(memSteps)}.')

  return memSteps, memGoals

def SimulatePolicy(env,policy,maxSteps):
  env.reset()
  tempo = .25
  clear_output(wait=True)
  env.render()

  steps = 0

  while steps < maxSteps:
    s = env.state
    a = policy[s]
    if s == env.goal:
        break
    if a in ['w', 'a', 's', 'd']:
        env.step(a)
        steps += 1

    clear_output(wait=True)
    env.render()
    time.sleep(tempo)
  if env.state == env.goal:
    print(f"Congratulations. You spent {steps} steps to reach the Goal.")
  else:
    print(f"You fail to reach the goal.")

  return steps

def SimulatePlan(env,plan):
  env.reset()
  tempo = .25
  clear_output(wait=True)
  env.render()

  steps = 0

  for a in plan:
    s = env.state
    if s == env.goal:
        break
    if a in ['w', 'a', 's', 'd']:
        env.step(a)
        steps += 1

    clear_output(wait=True)
    env.render()
    time.sleep(tempo)
  if env.state == env.goal:
    print(f"Congratulations. You spent {steps} steps to reach the Goal.")
  else:
    print(f"You fail to reach the goal.")

  return steps

#Execution 1: Manual Control

Expand the cell, before executing it.

In [None]:
nX = 20
nY = 10
maxSteps = 300

env = Grid_Environment(nX,nY)
evalGrid = Simulate(env,maxSteps)

env = GridWind_Environment(nX,nY)
evalGridWind = Simulate(env,maxSteps)

env = GridQuad_Environment(nX,nY)
evalGridQuad = Simulate(env,maxSteps)

env = GridQuadInv_Environment(nX,nY)
evalGridQuadInv = Simulate(env,maxSteps)

env = GridInvGoal_Environment(nX,nY)
evalGridInvGoal = Simulate(env,maxSteps)

clear_output(wait=True)
print(f"Your performance was: {evalGrid}, {evalGridWind}, {evalGridQuad}, {evalGridQuadInv}, {evalGridInvGoal}")
print(f"Total: {evalGrid+evalGridWind+evalGridQuad+evalGridQuadInv+evalGridInvGoal}")



# Execution 2: Deterministic Plan

Expand the cells, before executing them.

Define by trial and error a plan by setting the variable **plan**.

In [None]:
nX = 20
nY = 10

plan = 'awsd'

env = Grid_Environment(nX,nY)
eval = SimulatePlan(env, plan)



*******************G
********************
********************
********************
********************
********************
********************
********************
********************
*A******************
You fail to reach the goal.


Define by trial and error a plan by setting the variable **plan**.

In [None]:
nX = 20
nY = 10

plan = 'awsd'

env = GridQuad_Environment(nX,nY)
eval = SimulatePlan(env, plan)

Define by trial and error a plan by setting the variable **plan**.

In [None]:
nX = 20
nY = 10

plan = 'awsd'

env = GridQuadInv_Environment(nX,nY)
eval = SimulatePlan(env, plan)

# Execution 3: Policies
Expand the cells, before executing them.

Define by trial and error a policy by setting the variable **policy**.


In [None]:
nX = 10
nY = 5
maxSteps = 100

policy = {(1,5): 'a', (2,5): 'a', (3,5): 'a', (4,5): 'a', (5,5): 'a', (6,5): 'a', (7,5): 'a', (8,5): 'a', (9,5): 'a', (10,5): 'w',
          (1,4): 'a', (2,4): 'a', (3,4): 'a', (4,4): 'a', (5,4): 'a', (6,4): 'a', (7,4): 'a', (8,4): 'a', (9,4): 'a', (10,4): 'w',
          (1,3): 'd', (2,3): 'd', (3,3): 'd', (4,3): 'd', (5,3): 'd', (6,3): 's', (7,3): 's', (8,3): 's', (9,3): 's', (10,3): 'w',
          (1,2): 'w', (2,2): 'a', (3,2): 'a', (4,2): 'a', (5,2): 'd', (6,2): 'd', (7,2): 'd', (8,2): 'd', (9,2): 'd', (10,2): 'w',
          (1,1): 'w', (2,1): 'd', (3,1): 'd', (4,1): 'd', (5,1): 'd', (6,1): 'd', (7,1): 'd', (8,1): 'd', (9,1): 'd', (10,1): 'w'
         }



env = GridWind_Environment(nX,nY)
eval = SimulatePolicy(env, policy, maxSteps)

Evaluate the previously defined policy with Monte Carlo simulation.

In [None]:
maxSteps = 100
nSamples = 1000
s,g = EvaluatePolicyMC(env, policy, maxSteps, nSamples);