In [1]:
import numpy as np

# Environment parameters
grid_size = 100
start = (0, 0)
goal = (99, 99)
num_obstacles = int(grid_size * grid_size * 0.3)

# Initialize grid with obstacles
grid = np.zeros((grid_size, grid_size))
obstacles = set()
while len(obstacles) < num_obstacles:
    x, y = np.random.randint(0, grid_size), np.random.randint(0, grid_size)
    if (x, y) != start and (x, y) != goal:
        obstacles.add((x, y))
        grid[x, y] = -1  # Mark as obstacle

grid[start] = 1  # Start
grid[goal] = 2   # Goal


In [2]:
# Define the Q-learning agent
class QLearningAgent:
    def __init__(self, grid, start, goal, actions, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.q_table = np.zeros((grid.shape[0], grid.shape[1], len(actions)))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.start = start
        self.goal = goal
        self.grid = grid
        self.actions = actions

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(list(self.actions.keys()))
        x, y = state
        return np.argmax(self.q_table[x, y])

    def learn(self, state, action, reward, next_state):
        x, y = state
        next_x, next_y = next_state
        best_next_action = np.argmax(self.q_table[next_x, next_y])
        td_target = reward + self.gamma * self.q_table[next_x, next_y, best_next_action]
        self.q_table[x, y, action] += self.alpha * (td_target - self.q_table[x, y, action])

    def train(self, episodes=1000):
        for episode in range(episodes):
            state = self.start
            while state != self.goal:
                action = self.choose_action(state)
                next_state, reward, done = self.transition(state, action)
                self.learn(state, action, reward, next_state)
                state = next_state
                if done:
                    break

    def transition(self, state, action):
        x, y = state
        dx, dy = self.actions[action]
        next_state = (x + dx, y + dy)
        
        if next_state[0] < 0 or next_state[1] < 0 or next_state[0] >= grid_size or next_state[1] >= grid_size:
            return state, -1, False
        if self.grid[next_state] == -1:
            return state, -10, False
        if self.grid[next_state] == 2:
            return next_state, 10, True
        
        return next_state, -0.1, False

# Define actions
actions = {
    0: (-1, 0),  # Up
    1: (1, 0),   # Down
    2: (0, -1),  # Left
    3: (0, 1)    # Right
}


In [3]:
# Initialize and train the Q-learning agent
agent = QLearningAgent(grid, start, goal, actions)
agent.train(episodes=50)


In [4]:
# Place a new obstacle in the middle of the optimal path
new_obstacle = (50, 50)
grid[new_obstacle] = -1

# Function to reset affected Q-values
def reset_q_values_around_obstacle(agent, obstacle):
    x, y = obstacle
    for action in actions.values():
        dx, dy = action
        if 0 <= x + dx < grid_size and 0 <= y + dy < grid_size:
            agent.q_table[x + dx, y + dy, :] = 0  # Reset Q-values for neighboring cells

# Reset Q-values for states affected by the new obstacle
reset_q_values_around_obstacle(agent, new_obstacle)


In [5]:
# Increase exploration to promote unlearning
agent.epsilon = 0.3  # Increase exploration rate

# Retrain the agent to adapt to the new environment
agent.train(episodes=50)
