In [9]:
class Environment:
    def __init__(self,*rooms):
        self.rooms = list(rooms)

    def get_percept(self,i):
        return self.rooms[i]

    def clean_room(self,num):
        self.rooms[num] = "Clean"

class GoalBasedEnvironment:
    def __init__(self):
        self.goal = "Clean"

    def formulate_goal(self,percept):
        if percept == "Dirty":
            self.goal = "Clean"
        else:
            self.goal = "No action needed"

    def act(self,percept):
        self.formulate_goal(percept)
        if self.goal == "Clean":
            return "Clean the Room"
        else:
            return "Room already clean"

def run_agent(vacuumCleaner:GoalBasedEnvironment, env:Environment):
    for steps in range(len(env.rooms)):
        percept = env.get_percept(steps)
        action = vacuumCleaner.act(percept)
        print(f"Room: {steps+1}, Percept: {percept}, Action: {action}")

        if percept == "Dirty":
            env.clean_room(steps)
    
    print("Final State", env.rooms)

agent = GoalBasedEnvironment()
env = Environment("Dirty","Clean","Dirty","Dirty","Clean","Dirty","Dirty","Clean","Dirty")
run_agent(agent,env)




Room: 1, Percept: Dirty, Action: Clean the Room
Room: 2, Percept: Clean, Action: Room already clean
Room: 3, Percept: Dirty, Action: Clean the Room
Room: 4, Percept: Dirty, Action: Clean the Room
Room: 5, Percept: Clean, Action: Room already clean
Room: 6, Percept: Dirty, Action: Clean the Room
Room: 7, Percept: Dirty, Action: Clean the Room
Room: 8, Percept: Clean, Action: Room already clean
Room: 9, Percept: Dirty, Action: Clean the Room
Final State ['Clean', 'Clean', 'Clean', 'Clean', 'Clean', 'Clean', 'Clean', 'Clean', 'Clean']


In [13]:
class UtilityBasedAgent:
    def __init__(self):
        self.utility = {'Dirty':-10, 'Clean': 10}
    
    def calculate_utility(self,percept):
        return self.utility[percept]
    
    def select_action(self,percept):
        if percept == "Dirty":
            return "Clean Room"
        else:
            return "No Action Neeed"
        
    def act(self,percept):
        action = self.select_action(percept)
        return action
    
class Environment:
    def __init__(self,state):
        self.state = state
    
    def get_percept(self):
        return self.state
    
    def clean_room(self):
        self.state = "Clean"

def run_agent(room:Environment, machine:UtilityBasedAgent):
    total_utility=0
    for step in range(5):
        percept = room.get_percept()
        action = machine.act(percept)
        utility = machine.calculate_utility(percept)
        print(f"Step {step + 1}: Percept - {percept}, Action - {action}, Utility = {utility}")
        total_utility += utility

        if percept == "Dirty":
            room.clean_room()
    
    print("Total Utility = ", total_utility)

agent = UtilityBasedAgent()
env = Environment("Dirty")
run_agent(env,agent)

Step 1: Percept - Dirty, Action - Clean Room, Utility = -10
Step 2: Percept - Clean, Action - No Action Neeed, Utility = 10
Step 3: Percept - Clean, Action - No Action Neeed, Utility = 10
Step 4: Percept - Clean, Action - No Action Neeed, Utility = 10
Step 5: Percept - Clean, Action - No Action Neeed, Utility = 10
Total Utility =  30


In [18]:
import random

class LearningBasedAgent:
    def __init__(self,actions):
        self.Q = {}
        self.actions = actions
        self.alpha = 0.1    #learning rate
        self.gamma = 0.9    #discount factor
        self.epsilon = 0.1  #exploration rate

    def get_Qvalue(self,state,action):
        return self.Q.get((state,action),0.0)
    def select_action(self,state):
        if random.uniform(0,1) < self.epsilon:
            return random.choice(self.actions)
        else:
            return max(self.actions,key=lambda a:self.get_Qvalue(state,a))
        
    def learn(self,state,action,reward,next_state):
        old_Q = self.get_Qvalue(state,action)
        best_future_Q = max([self.get_Qvalue(next_state,a) for a in self.actions])
        self.Q[(state,action)] = old_Q + self.alpha * (reward + self.gamma * best_future_Q - old_Q)

    def act(self,state):
        action = self.select_action(state)
        return action

class Environment:
    def __init__(self,state):
        self.state = state

    def get_percept(self):
        self.state = random.choice(['Dirty', 'Clean'])
        return self.state
    
    def clean_room(self):
        self.state = 'Clean'
        return 10
    
    def no_action_reward(self):
        return 0
    
def run_agent(agent,environment):
    for step in range(5):
        percept = environment.get_percept()
        action = agent.act(percept)
        if percept == 'Dirty':
            reward = environment.clean_room()
        else:
            reward = environment.no_action_reward()
        next_percept = environment.get_percept()
        agent.learn(percept,action,reward,next_percept)
        print(f"Step {step + 1}: Percept - {percept}, Action - {action}, Reward - {reward}")
        print("Q-table: ", agent.Q)
        print("-"*40)

agent = LearningBasedAgent(['Clean the room','No action needed'])
env = Environment("Dirty")
run_agent(agent,env)

Step 1: Percept - Dirty, Action - Clean the room, Reward - 10
Q-table:  {('Dirty', 'Clean the room'): 1.0}
----------------------------------------
Step 2: Percept - Dirty, Action - Clean the room, Reward - 10
Q-table:  {('Dirty', 'Clean the room'): 1.9}
----------------------------------------
Step 3: Percept - Clean, Action - Clean the room, Reward - 0
Q-table:  {('Dirty', 'Clean the room'): 1.9, ('Clean', 'Clean the room'): 0.171}
----------------------------------------
Step 4: Percept - Dirty, Action - Clean the room, Reward - 10
Q-table:  {('Dirty', 'Clean the room'): 2.72539, ('Clean', 'Clean the room'): 0.171}
----------------------------------------
Step 5: Percept - Dirty, Action - Clean the room, Reward - 10
Q-table:  {('Dirty', 'Clean the room'): 3.468241, ('Clean', 'Clean the room'): 0.171}
----------------------------------------
