In [None]:
import numpy as np
import random

#used to hold the position of agents and if they are in posession of a block
class agent:
    def __init__(self, x, y, name, lr, discount):
        self.row = x
        self.col = y
        self.block = False
        # 1 - valid | 0 - invalid
        self.ops = {'north' : 0, 'east' : 0, 'south' : 0, 'west' : 0, 'pick': 0, 'drop': 0}
        self.name = name
        
        self.qtable = np.zeros((25,6), dtype="float")
        self.lr = lr
        self.y = discount

    def pick(self):
        self.block = True
    
    def drop(self):
        self.block = False
    
        '''
    calculate reward:
    - -1 for ('north', 'west', 'south', 'east')
    - +13 for 'pick' or 'drop'

    returns reward
    '''
    def reward(self, operation):
        if operation in ['north', 'west', 'north', 'east']:
            return -1
        else:
            return 13
        
        
    def moveNorth(self):
        if self.row > 0:
            self.row -= 1

    def moveSouth(self):
        if self.row < 4:
            self.row += 1

    def moveEast(self):
        if self.col < 4:
            self.col += 1

    def moveWest(self):
        if self.col > 0:
            self.col -= 1

    '''
    reward = +13 or -1
    operation = move made by agent
    '''
    def updateQtable(self, operation):
        operation_array = list(self.ops.keys())
        R = self.reward(operation)
        
        #25 possible states, row 
        pos = self.row * 5 + (self.col + 1)
        
        #updating the qtable
        first_part = (1 - self.lr) * self.qtable[pos, operation_array.index(operation)]
        second_part = self.lr*(R + self.y * )
        self.qtable[pos, operation_array.index(operation)] = first_part + second_part

    def nextState(self, operation):
        if operation in ['pick']

#must initialize world with policy, lr, y, episodes
class world:
    def __init__(self, policy, lr, y, episodes=4000):
        self.environment = np.zeros((5,5), dtype="object")
        self.red = agent(2,2,'red', lr, y)
        self.black = agent(0,2,'black', lr, y)
        self.blue = agent(4,2,'blue', lr, y)
        #order of agents
        self.agents = {'red' : self.red, 'blue' : self.blue, 'black' : self.black}
        #dict to hold pickup and dropoff locations and block numbers
        self.pickLoc = {(0, 4) : 5, (1,3) : 5, (4,1) : 5}
        self.dropLoc = {(0, 0) : 0, (2,0) : 0, (3,4) : 0}
        self.init_environment()
        self.policy = policy
        #learning rate
        self.lr = lr
        #discount rate
        self.y = y
        #episodes
        self.num_episodes = episodes

    #populate environment
    def init_environment(self):
        for cord, _ in self.pickLoc.items():
            self.environment[cord[0], cord[1]] = 'P'

        for cord, _ in self.dropLoc.items():
            self.environment[cord[0], cord[1]] = 'D'

        for name, agent in self.agents.items():
            self.environment[agent.row][agent.col] = name

    #needs to be changed
    def print_environment(self):
        print(self.environment)

    '''
    example code:
    if (agent.col, agent.row) in self.pickLoc.keys():
        self.pickup(agent)

    decreases pickLoc blocks at location of agent and changes agent block state to True
    '''
    def pickup(self, agent):
        #checks if blocks available at 'P' and if agent doesnt have a block
        if self.pickLoc[(agent.row, agent.col)] > 0 and agent.block == False:
            agent.pick()
            self.pickLoc[(agent.row, agent.col)] -= 1
            return True
        
        return False
    '''
    example code:
    if (agent.col, agent.row) in self.dropoff.keys():
        self.dropoff(agent)

    increases pickLoc blocks at location of agent and changes agent block state to False
    '''
    def dropoff(self, agent):
        #checks if current 'D' needs a block and if agent has a block
        if self.dropLoc[(agent.row, agent.col)] < 5 and agent.block == True:
            agent.drop()
            self.dropLoc[(agent.row, agent.col)] += 1
            return True

        return False

    '''
    determines valid operators of agent, part of policy
    '''
    def validOps(self, agent):
        # Reset ops
        agent.ops = {'north': 0, 'east': 0, 'south': 0,
                        'west': 0, 'pick': 0, 'drop': 0}

        other_agents_pos = [(other_agents.row, other_agents.col)
                            for _, other_agents in self.agents.items() if other_agents != agent]

        print(other_agents_pos)

        i, j = agent.row, agent.col
        x = int(agent.block)

        # Conditions for 'north'
        if i > 0 and (i-1, j) not in other_agents_pos:
            agent.ops['north'] = 1

        # Conditions for 'south'
        if i < 4 and (i+1, j) not in other_agents_pos:
            agent.ops['south'] = 1

        # Conditions for 'east'
        if j < 4 and (i, j+1) not in other_agents_pos:
            agent.ops['east'] = 1

        # Conditions for 'west'
        if j > 0 and (i, j-1) not in other_agents_pos:
            agent.ops['west'] = 1

        # Conditions for 'pick'
        if (i, j) in self.pickLoc.keys() and x == 0 and self.pickLoc[(i, j)] > 0:
            print(self.pickLoc[(i, j)])

        # Conditions for 'drop'
        if (i, j) in self.dropLoc.keys() and x == 1:
            print(self.dropLoc[(i, j)])

        print(agent.ops)

    '''
    if picking or dropping a box is available do that, else do random operation
    '''
    def randomOp(self, agent):
        #determine valid operations for the agent
        self.validOps(agent)

        #filter out invalid operations
        valid_operations = [op for op, is_valid in agent.ops.items() if is_valid == 1]

        random_operation = random.choice(valid_operations)
        
        #if drop or pick is available pick that
        if 'pick' in valid_operations and self.pickup:
            print(f'{agent.name} picked up a block!')
            random_operation = 'pick'

        elif 'drop' in valid_operations and self.dropoff:
            print(f'{agent.name} dropped of a block!')
            random_operation = 'drop'
    
        agent.updateTable(random_operation)
        
    def greedyOp(self, agent):
        self.validOps(agent)
        
        valid_operations = [op for op, is_valid in agent.ops.items() if is_valid == 1]
        
        op = random.choice(valid_operations)
        
        if 'pick' in valid_operations and self.pickup:
            print(f'{agent.name} picked up a block!')
            op = 'pick'
        
        elif 'drop' in valid_operations and self.dropoff:
            print(f'{agent.name} dropped of a block!')
            op = 'drop'
            
        max_q_val = np.max(agent.qtable)
        
        max_q_ops = [op for op in valid_operations if agent.qtable[op] == max_q_val]            #need to look at later
        
        if len(max_q_ops) > 1:
            op = random.choice(max_q_ops)
        else:
            op = max_q_ops[0]
            
        print(f'{agent.name} performed {op} operation.')
        



: 

In [None]:
test = 