In [5]:
# routing_env.py

import gym
from gym import spaces
import numpy as np
import socket
import pickle

In [6]:
class MessageRoutingEnv(gym.Env):
    def __init__(self, node_id, num_nodes, host, port):
        super(MessageRoutingEnv, self).__init__()
        self.num_nodes = num_nodes
        self.node_id = node_id
        self.host = host
        self.port = port
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.bind((self.host, self.port))
        self.socket.listen()
        self.observation_space = spaces.Discrete(self.num_nodes)
        self.action_space = spaces.Discrete(self.num_nodes)
        self.state = None
        self.target_node = None
        self.step_count = 0

    def reset(self):
        self.state = np.random.randint(0, self.num_nodes)
        self.target_node = np.random.randint(0, self.num_nodes)
        self.step_count = 0
        return self.state

    def step(self, action):
        # Simulate message routing based on the action
        reward = 1 if action == self.target_node else 0
        self.step_count += 1

        # Example: Send a message to the target node
        message = {"sender": self.node_id, "action": action, "reward": reward}
        self.send_message(self.target_node, message)

        done = self.step_count >= 20
        return self.state, reward, done, {}

    def send_message(self, target_node, message):
        # Connect to the target node and send the message using TCP/IP
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.connect((self.host, self.port))
            s.sendall(pickle.dumps(message))

    def receive_messages(self):
        # Accept incoming connections and receive messages
        messages = []
        while True:
            conn, addr = self.socket.accept()
            with conn:
                data = conn.recv(1024)
                if not data:
                    break
                messages.append(pickle.loads(data))
        return messages

    def render(self):
        pass

    def close(self):
        self.socket.close()


In [7]:
import numpy as np

class QLearningAgent:
    def __init__(self, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.3):
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_actions, num_actions))

    def select_action(self, state, messages):
        if np.random.uniform(0, 1) < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(self.q_table[state, :])

    def update_q_table(self, state, action, reward, next_state, messages):
        predict = self.q_table[state, action]
        target = reward + self.discount_factor * np.max(self.q_table[next_state, :])
        self.q_table[state, action] += self.learning_rate * (target - predict)


In [8]:
# network_node.py

import socket
import pickle

class NetworkNode:
    def __init__(self, node_id, host, port):
        self.node_id = node_id
        self.host = host
        self.port = port
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.bind((self.host, self.port))
        self.socket.listen()

    def receive_messages(self):
        # Accept incoming connections and receive messages
        messages = []
        while True:
            conn, addr = self.socket.accept()
            with conn:
                data = conn.recv(1024)
                if not data:
                    break
                messages.append(pickle.loads(data))
        return messages


In [9]:
# main.py

#from routing_env import MessageRoutingEnv
#from ql_agent import QLearningAgent
#from network_node import NetworkNode
import threading

def main():
    num_nodes = 5
    edge_probability = 0.2

    # Create the network nodes
    nodes = [NetworkNode(node_id=i, host='localhost', port=5000 + i) for i in range(num_nodes)]

    # Start a thread for each node to listen for incoming messages
    for node in nodes:
        threading.Thread(target=node.receive_messages).start()

    # Create the environment using the network nodes
    env = MessageRoutingEnv(node_id=0, num_nodes=num_nodes, host='localhost', port=5000)

    # Create the Q-learning agent
    agent = QLearningAgent(env.num_actions)

    # Main training loop
    for episode in range(1000):
        state = env.reset()
        total_reward = 0

        while True:
            # Receive messages from the network
            messages = env.receive_messages()

            # Select action based on the current state and received messages
            action = agent.select_action(state, messages)

            # Send the selected action to the environment (simulate taking an action)
            next_state, reward, done, _ = env.step(action)

            # Update Q-table based on the observed reward and next state
            agent.update_q_table(state, action, reward, next_state, messages)

            state = next_state
            total_reward += reward

            if done:
                break

        print(f"Episode: {episode}, Total Reward: {total_reward}")

    env.close()

if __name__ == "__main__":
    main()


OSError: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted