In [None]:
import numpy as np
import random
from collections import defaultdict

class PenTestingEnv:
    def __init__(self):
        self.state_space = 10  # Increase state complexity
        self.action_space = 5  # More actions
        self.reset()

    def reset(self):
        self.state = np.random.randint(0, self.state_space)
        return self.state

    def step(self, action):
        next_state = (self.state + action) % self.state_space
        reward = 1 if next_state == 4 else -1
        done = next_state == 4
        self.state = next_state
        return next_state, reward, done

env = PenTestingEnv()
Q = defaultdict(lambda: np.zeros(env.action_space))
alpha = 0.1
gamma = 0.6
epsilon = 0.1

# Training the agent with extended steps
for episode in range(1000):
    state = env.reset()
    for step in range(100):
        if random.uniform(0, 1) < epsilon:
            action = random.choice(range(env.action_space))
        else:
            action = np.argmax(Q[state])

        next_state, reward, done = env.step(action)
        Q[state][action] += alpha * (reward + gamma * np.max(Q[next_state]) - Q[state][action])

        if done:
            break
        state = next_state

print("Final Q-Table:", Q)

# Test agent performance
def test_agent():
    state = env.reset()
    done = False
    steps = 0
    while not done:
        action = np.argmax(Q[state])
        state, _, done = env.step(action)
        steps += 1
    print(f"Agent completed task in {steps} steps")

test_agent()