In [3]:
import numpy as np
from tqdm import tqdm_notebook
from env import EnvSpec, Env, EnvWithModel
from policy import Policy
import matplotlib.pyplot as plt
from matplotlib import colors
from scipy import stats
from mpl_toolkits import mplot3d

In [4]:
class RandomPolicy(Policy):
    def __init__(self, nA, p=None):
        self.p = p if p is not None else np.array([1/nA]*nA)

    def action_prob(self, state, action=None):
        return self.p[action]

    def action(self, state):
        return np.random.choice(len(self.p), p=self.p)        

# LIGHT WORLD DOMAIN

In [5]:
class LightWorld(EnvWithModel):
    
    def __init__(self):
        
        self.stateDim = 17 # X,Y Positions + Room ID + 12 Light Sensor inputs.
        self.nA = 6 # 0-Left, 1-Right, 2-Up, 3-Down, 4-Pickup, 5-Press.
        self.nRooms = 4
        # Room1 Properties - TODO: Convert these into dictionaries
        self.IDs = [0,1,2,3]
        self.actionProb = 0.9
        
        self.rooms = {
            0 : {'Dim' : np.array([6,6]),
                'Entry': np.array([2,1]),
                'Door' : np.array([5,4]),
                'Lock' : np.array([5,1]),
                'Key'  : np.array([2,2])},

            1 : {'Dim' : np.array([6,6]),
                'Entry': np.array([1,4]),
                'Door' : np.array([2,0]),
                'Lock' : np.array([4,0]),
                'Key'  : np.array([2,2])},
            
            2 : {'Dim' : np.array([6,6]),
                'Entry': np.array([4,2]),
                'Door' : np.array([0,4]),
                'Lock' : np.array([0,2]),
                'Key'  : np.array([2,2])},
            
            3 : {'Dim' : np.array([6,6]),
                'Entry': np.array([4,2]),
                'Door' : np.array([0,4]),
                'Lock' : np.array([0,2]),
                'Key'  : np.array([2,2])}            
        }
        
        self.reset()
        
    def reset(self) -> np.array:
        ## Initializes a random initial state in one of the rooms.        
        #State Initialization
        self.state = np.zeros((17))
        # Randomly choosing room
        self.state[0] = np.random.choice(self.IDs)
        # Randomly Initiliazing x,y coordinates in a chosen room
        self.state[1] = np.random.choice(np.arange(1, self.rooms[self.state[0]]['Dim'][0] -1))
        self.state[2] = np.random.choice(np.arange(1, self.rooms[self.state[0]]['Dim'][1] -1))
        # Key presence
        self.state[3] = 0
        # Lock Status
        self.state[4] = 0
        
        self.updateLightSensor()
        
    
    def updateLightSensor(self):
        
        self.state[5] = int(self.state[2]<self.rooms[self.state[0]]['Key'][1])
        self.state[6] = int(self.state[2]<self.rooms[self.state[0]]['Door'][1])
        self.state[7] = int(self.state[2]<self.rooms[self.state[0]]['Lock'][1])
        # South 
        self.state[8] = int(self.state[2]>self.rooms[self.state[0]]['Key'][1])
        self.state[9] = int(self.state[2]>self.rooms[self.state[0]]['Door'][1])
        self.state[10] = int(self.state[2]>self.rooms[self.state[0]]['Lock'][1])
        # East
        self.state[11] = int(self.state[1]<self.rooms[self.state[0]]['Key'][0])
        self.state[12] = int(self.state[1]<self.rooms[self.state[0]]['Door'][0])
        self.state[13] = int(self.state[1]<self.rooms[self.state[0]]['Lock'][0])
        # West
        self.state[14] = int(self.state[1]>self.rooms[self.state[0]]['Key'][0])
        self.state[15] = int(self.state[1]>self.rooms[self.state[0]]['Door'][0])
        self.state[16] = int(self.state[1]>self.rooms[self.state[0]]['Lock'][0])
   
    # Check for requirements before calling these fundamental methods
    def actLeft():
        if np.random.rand() < self.actionProb:
            self.state[1] -= 1
    
    def actRight():
        if np.random.rand() < self.actionProb:
            self.state[1] += 1 
            
    def actUp():
        if np.random.rand() < self.actionProb:
            self.state[2] += 1
    
    def actDown():
        if np.random.rand() < self.actionProb:
            self.state[2] -= 1
            
    def actPickup():
        if np.random.rand() < self.actionProb:
            self.state[3] = 1
            self.keyPickup = True
            
    
    def actPress():
        if np.random.rand() < self.actionProb:
            self.state[4] = 1
            self.lockOpen = True
            
    def nextRoom():
       
        # update room ID
        if self.state[0] < 4 : self.state[0] += 1
        else: self.state[0] = 0
        
        #Update Position
        self.state[1:3] == self.rooms[self.state[0]]['Entry']
        self.roomChange = True
        
    
    def Transition(self, a):
        # Moving inside the room
     
        ID = self.rooms[self.state[0]]
        Lock = self.rooms[ID]['Lock']
        Door = self.rooms[ID]['Door']
        
        # Resetting boolens of rewarding actions
        self.keyPickup = False
        self.lockOpen = False
        self.roomChange = False
        
        if (a == 0) and (self.state > 1): self.actLeft()
        if (a == 1) and (self.state[1] < self.rooms[self.state[0]]['Dim'][0] - 2): self.actRight()
        if (a == 2) and (self.state[2] < self.rooms[self.state[0]]['Dim'][1] - 2): self.actUp()                
        if (a == 3) and (self.state[2] > 1): self.actDown()
        if (a == 4) and (self.state[1:3] == self.rooms[self.state[0]]['Key']): self.actPickup()
        
        # Handling exceptions for moving into door and lock
        if ((self.state[1] == Door[0]) or (self.state[1] == Lock[0])):
            if self.state[2] == 1 and a == 3: self.actDown()
            elif (self.state[2] == self.rooms[self.state[0]]['Dim'][1] - 2) and a == 2: self.actUp()
            if self.state[1:3] == Door : self.nextRoom()
        
        if self.state[2] == Door[1] or self.state[2] == Lock[1]:
            if self.state[1] == 1 and a == 0: self.actLeft()
            elif (self.state[1] == self.rooms[self.state[0]]['Dim'][0] - 2) and a == 1: self.actRight()
            if self.state[1:3] == Door : self.nextRoom()    
        
        if a == 5 and (self.state[1:3] == Lock): self.actPickup()
            
            
    def R(self, a):
        
        if self.keyPickup: return 1.0
        elif self.lockOpen: return 1.0
        elif self.roomChange: return 10.0
        
        return -0.1       
    
    def step(self, action):
        assert action in range(self.nA), "Invalid action"
        
        prevState = self.state
        self.Transition(action)
        reward = self.R(action)
        
        return self.state, reward
    
    def VisualizeMDP()
        pass

In [6]:
temp = LightWorld()

In [13]:
temp.state[1:3]


array([4., 4.])

In [14]:
temp.state

array([3., 4., 4., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 1.])

In [18]:
a = 1
b = 1
print(int(a==b))

1
