In [14]:
import random as r

In [15]:
class Enviroment:
    FLOOR_STATE = {"clean": 0, "dirty": 1}
    
    def __init__ (self, size: int):
        self.rooms = [0] * size

    def dirty_room(self, position: int):
        self.rooms[position] = Enviroment.FLOOR_STATE["dirty"]
            
    def clean_room(self, position: int):
        self.rooms[position] = Enviroment.FLOOR_STATE["clean"]
                
    def init_randomly(self, dirtyRate: int):
        for i in range(len(self.rooms)):
            if r.randint(0, 100) < dirtyRate:
                self.dirty_room(i)
            else:
                self.clean_room(i)
    
    def init_rooms(self, rooms: list):
        self.rooms = rooms.copy()

    def rand_dirty_rooms (self, dirtyRate: int):
        for i, room in enumerate(self.rooms):
            if(room == Enviroment.FLOOR_STATE["clean"] and r.randint(0, 100) < dirtyRate):
                self.dirty_room(i)

In [16]:
class Agent:
    ACTIONS = {"NoOp": 0, "clean": 2, "right": 1, "left": -1}
    
    def __init__ (self):
        self.position = None
        
    def initial_position(self, initial_position: int):
        self.position = initial_position
    
    def action (self, env: Enviroment):
        if env.rooms[self.position] == Enviroment.FLOOR_STATE["dirty"]:
            env.rooms[self.position] = Enviroment.FLOOR_STATE["clean"]
            
            return Agent.ACTIONS["clean"]
        
        distance = 0
        
        for i in range(len(env.rooms)):
            if env.rooms[i] == Enviroment.FLOOR_STATE["dirty"]:
                if abs(i - self.position) < abs(distance) or distance == 0:
                    distance = i - self.position
        
        temp = 1 if distance > 0 else 0 if distance == 0 else -1
            
        self.position += temp
            
        return temp

In [17]:
class Evaluator:
    def __init__ (self, env: Enviroment, agent: Agent, steps: int, seed: int, dirtyRate: int):
        self.env = env
        self.agent = agent
        self.steps = steps
        self.seed = seed
        self.dirtyRate = dirtyRate
        self.agent_points = 0
    
    def execute (self):
        self.agent_points = 0
        for i in range(self.steps):
            self.evaluate_agent(self.agent.action(self.env))
            self.env.rand_dirty_rooms(self.dirtyRate)
        
        return self.agent_points
        
    def evaluate_agent (self, action: int):
        if action == Agent.ACTIONS["clean"]:
            self.agent_points += 1
        elif action == Agent.ACTIONS["right"] or action == Agent.ACTIONS["left"]:
            self.agent_points -= 1

In [21]:
env = Enviroment(2)
agent = Agent()
eval = Evaluator(env, agent, 1000, 123, 30)

env_possibilities = [[0, 0], [0, 1], [1, 0], [1, 1]]
agent_possibilities = [0, 1]
result = []

for agent_possibility in agent_possibilities:
    print(f"Agente com posição inicial {agent_possibility}: ")
    for env_possibility in env_possibilities: 
        agent.initial_position(agent_possibility)
        env.init_rooms(env_possibility)
        result.append(eval.execute())
        print(f"\tAmbiente inicial: {env_possibility} | Pontuação: {result[-1]}")
    print()
print(f"Média das pontuações: {sum(result)/len(result)}")

Agente com posição inicial 0: 
	Ambiente inicial: [0, 0] | Pontuação: 227
	Ambiente inicial: [0, 1] | Pontuação: 235
	Ambiente inicial: [1, 0] | Pontuação: 222
	Ambiente inicial: [1, 1] | Pontuação: 244

Agente com posição inicial 1: 
	Ambiente inicial: [0, 0] | Pontuação: 227
	Ambiente inicial: [0, 1] | Pontuação: 221
	Ambiente inicial: [1, 0] | Pontuação: 215
	Ambiente inicial: [1, 1] | Pontuação: 249

Média das pontuações: 230.0
