In [None]:
import numpy as np
import random
from collections import defaultdict

# Define a mock network environment
class NetworkEnv:
    def __init__(self):
        self.state_space = 15  # Increased complexity
        self.action_space = 5
        self.reset()

    def reset(self):
        self.state = np.random.randint(0, self.state_space)
        return self.state

    def step(self, action):
        next_state = (self.state + action) % self.state_space
        reward = np.random.choice([1, -1])  # Random reward to simulate network dynamics
        done = next_state == 0
        self.state = next_state
        return next_state, reward, done

# Multi-Agent Q-Learning
def train_multi_agent(agents, episodes=500, max_steps=100):
    Q_tables = [defaultdict(lambda: np.zeros(env.action_space)) for _ in agents]

    for episode in range(episodes):
        for i, agent in enumerate(agents):
            state = env.reset()
            for step in range(max_steps):
                if random.uniform(0, 1) < 0.1:
                    action = random.choice(range(env.action_space))
                else:
                    action = np.argmax(Q_tables[i][state])

                next_state, reward, done = env.step(action)
                Q_tables[i][state][action] += 0.1 * (reward + 0.6 * np.max(Q_tables[i][next_state]) - Q_tables[i][state][action])

                if done:
                    break
                state = next_state

    return Q_tables

# Set up agents and train
env = NetworkEnv()
agents = ["Agent1", "Agent2", "Agent3"]
Q_tables = train_multi_agent(agents)

# Test each agent
def test_agent(agent_idx):
    state = env.reset()
    done = False
    steps = 0
    while not done:
        action = np.argmax(Q_tables[agent_idx][state])
        state, _, done = env.step(action)
        steps += 1
    print(f"{agents[agent_idx]} completed task in {steps} steps")

for i in range(len(agents)):
    test_agent(i)