<a href="https://colab.research.google.com/github/RKDash7/QLB-IoT/blob/main/Q_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#%%writefile mypackage/Iot_Edge.py
import networkx as nx
import matplotlib.pyplot as plt
import simpy
import random
import numpy as np
from Crypto.PublicKey import RSA
from Crypto.Cipher import PKCS1_OAEP
import hashlib
from mypackage.Block_chain import generate_keys,encrypt_message,Blockchain,sign_transaction, generate_key

initial_Energy=0
Energ_consumption_per_iteration=[]
dead_per_iteration=[]
inital_dead_node=0
e_T=
#Q=0
_, public_key = generate_key()
# Step 1: Create the Multi-hop IoT Network with Edge Computing Nodes using NetworkX
def create_multi_hop_iot_edge_network(num_devices=10, num_edge_nodes=2, connection_prob=0.3):
    G = nx.erdos_renyi_graph(num_devices + num_edge_nodes, connection_prob)  # Random graph of devices + edge nodes
    inital_energy=100*num_devices
    for i in range(num_devices):
        G.nodes[i]['device_type'] = 'sensor'  # IoT devices are sensors
        G.nodes[i]['data'] = {'temperature': random.uniform(20, 30), 'humidity': random.uniform(30, 60)}
        G.nodes[i]['energy'] = 100.0  # Initial energy of each sensor (arbitrary units)
    for i in range(num_devices, num_devices + num_edge_nodes):
        G.nodes[i]['device_type'] = 'edge_node'  # Edge computing nodes (gateways or local servers)
        G.nodes[i]['processed_data'] = {}  # Store processed data from sensors
        G.nodes[i]['energy'] = 1000.0  # Edge node energy
    return G

# Step 2: Visualize the IoT Edge Network
def visualize_network(G):
    pos = nx.spring_layout(G, seed=42)
    plt.figure(figsize=(8, 6))
    nx.draw(G, pos, with_labels=True, node_size=2000, node_color='lightblue', font_size=10, font_weight='bold', edge_color='gray')

    # Add device type labels
    for i in range(len(G.nodes)):
        device_type = G.nodes[i]['device_type']
        if device_type == 'sensor':
            plt.text(pos[i][0], pos[i][1] + 0.05, device_type, fontsize=12, ha='center')
        elif device_type == 'edge_node':
            plt.text(pos[i][0], pos[i][1] + 0.05, device_type, fontsize=12, ha='center',color='red')

    plt.title("Multi-hop IoT based Edge Computing Network Simulation with Q-Learning for Energy Minimization")
    plt.show()

# Step 3: Q-learning Agent for IoT Device Routing
class QLearningAgent:
    def __init__(self, state_space, action_space, learning_rate=0.1, discount_factor=0.9, exploration_rate=1, exploration_decay=0.995):
        self.state_space = state_space  # State space
        self.action_space = action_space  # Action space
        self.learning_rate = learning_rate  # Learning rate
        self.discount_factor = discount_factor  # Discount factor
        self.exploration_rate = exploration_rate  # Exploration rate (epsilon)
        self.exploration_decay = exploration_decay  # Exploration decay rate
        self.q_table = np.zeros((state_space, action_space))  # Q-table initialized to zero

    def choose_action(self, state,communication_network,edge_nodes):
        """Epsilon-greedy action selection."""
        if random.uniform(0, 1) < self.exploration_rate:
            neighbors =list(communication_network.neighbors(state)) # [i for i in range(self.state_space) if i != state and communication_network.nodes[i]['energy'] > 0]
            if neighbors:
              for i in range(len(neighbors)):
                if neighbors[i]  in edge_nodes:
                  return i
                else:
                  neigh=np.random.choice(range(0,len(neighbors)))
                  if communication_network.nodes[neighbors[neigh]]['energy']>0:
                    return neigh
                  else:
                    return None#np.random.choice(range(0,len(neighbors)))
            else:
                  return None#np.random.choice(range(0,self.action_space))
            #return np.random.choice(range(0,len(neighbors)))
        else:
            # Exploit: choose the best action based on the Q-table
            valid_actions = [i for i in range(self.action_space) if i < len(list(communication_network.neighbors(state)))]
            if valid_actions:
                return np.argmax(self.q_table[state])
            else:
              return None#np.random.choice(range(0,len(neighbors)))
            #return np.argmax(self.q_table[state])


    def learn(self, state, action, reward, next_state,communication_network):
        """Update the Q-table based on the agent's experience."""

        best_next_action = np.argmax(self.q_table[state])#np.argmax(self.q_table[next_state])
        if communication_network.nodes[state]['energy']>0:
          q_value = self.q_table[state, action]
          if self.q_table[state, action]!=-np.inf:
            self.q_table[state, action] = q_value + self.learning_rate * (reward + self.discount_factor * self.q_table[next_state, best_next_action] - q_value)

        # Decay exploration rate
        self.exploration_rate *= self.exploration_decay
    def distance(self, node1, node2, communication_network):
        #"""Calculate the Euclidean distance between two nodes."""
        d=np.linalg.norm(np.array(list(communication_network.nodes[node1]['pos'])) - np.array(list(communication_network.nodes[node2]['pos'])))
        print("distance=",d)
        return d# np.linalg.norm(np.array(list(communication_network.nodes[node1]['pos'])) - np.array(list(communication_network.nodes[node2]['pos'])))
    def reward_function(self, current_node, next_node, communication_network):
        #"""Calculate the reward for a particular transition."""
        if communication_network.nodes[next_node]['energy'] <= 0:  # Avoid dead nodes
            return -np.inf
        dist = self.distance(current_node, next_node,communication_network)  # Call distance method of the class
        if dist==0:
          return 0
        else:
          return  1/dist #if communication_network.nodes[next_node]['energy']>0 else -np.inf
    def visualize_q_table(self):
      print(self.q_table)
      plt.figure(figsize=(8, 8))
      plt.imshow(self.q_table, cmap='hot', interpolation='nearest')
      plt.colorbar()
      plt.title("Q-Table (Routing Decisions)")
      plt.xlabel("Next Node")
      plt.ylabel("Current Node")
      plt.yticks(range(0,self.state_space,5))
      plt.xticks(range(0,self.action_space,5))
      plt.show()

# Step 4: IoT Device Process with Q-learning Routing (Energy Consideration)
def iot_device_with_q_learning(env,device_id, data, communication_network, edge_nodes, agent, node_state_space, energy_model):
    """
    Simulate an IoT device (sensor) generating data and using Q-learning to send it to an edge node for processing.
    Energy consumption is considered during the data transmission.
    """
    global iniatl_energy,initial_dead_node


    if communication_network.nodes[device_id]['energy'] > 0:
      new_data = {'temperature': random.uniform(20, 30), 'humidity': random.uniform(30, 60)}
      print(f"Sensor {device_id} generating data: {new_data}")
      encrypted_data = encrypt_message(str(new_data), public_key)
      communication_network.nodes[device_id]['data'] = encrypted_data
      print(f"Sensor {device_id} encrypting data: {encrypted_data}")
    source=device_id
    path=[source]
    while device_id not in edge_nodes:#not in edge_nodes:
      if communication_network.nodes[device_id]['energy'] > 0:
        #source=device_id
        # Generate new sensor data

        # Get the current state of the device (represented by the node it's connected to)

        state = device_id  # Simple state: the node the device is connected to

        neighbors = list(communication_network.neighbors(state))
        # Choose an action using the Q-learning agent (choose next hop)
        action = agent.choose_action(state,communication_network,edge_nodes)
        if action==None:
          break
        print(action)
        if neighbors[action] in edge_nodes:
          neighbors = list(communication_network.neighbors(state))
          next_hop = neighbors[action]
          next_state = next_hop  # The next state is the next hop
          reward = agent.reward_function(state, next_hop, communication_network)
          agent.learn(state, action, reward, next_state,communication_network)

          break
        #print(action)
        # The action corresponds to the next hop (an intermediate node or edge node)

        num_valid_actions = len(neighbors)
        #action = action % num_valid_actions
        next_hop = neighbors[action]
        print(f"Sensor {device_id} forwarding data to Node {next_hop} via Q-learning")

        # Calculate energy consumption for transmission
        energy_consumed = energy_model[state][next_hop]
        if communication_network.nodes[state]['energy']>0 and communication_network.nodes[next_hop]['energy']>0:
          communication_network.nodes[state]['energy'] -= energy_consumed  # Deduct energy from the current node
          communication_network.nodes[next_hop]['energy'] -= e_R*n  # Deduct energy from the next node

        else:
          dead_node=dead_node+1
          dead.append(dead_node)
          break
        E-=energy_consumed
        initial_energy.append(E)
        print(f"Energy consumed: {energy_consumed} (Sensor {device_id} -> Node {next_hop})")

        # Simulate communication delay for multi-hop transmission
        #yield env.timeout(random.randint(1, 3))  # Simulate delay

        # If the next hop is an edge node, process the data
        if next_hop in edge_nodes:
            edge_node_data = communication_network.nodes[next_hop]['processed_data']
            edge_node_data[device_id] = new_data  # Edge node processes the data
            print(f"Sensor {device_id} data processed at Edge Node {next_hop}")

        # Reward is inversely related to energy consumption, promoting energy-efficient routing
        reward = agent.reward_function(state, next_hop, communication_network)  # Reward is negative energy consumption
        if reward==np.nan:
          reward=0
        # Update the Q-learning agent with the new experience
        next_state = next_hop  # The next state is the next hop
        agent.learn(state, action, reward, next_state,communication_network)
        path.append(next_state)
        device_id=next_hop
        # Wait before generating more data
        yield env.timeout(random.randint(5, 10))
      else:
        break
      Destination=np.random.choice(edge_nodes)
      print(path)
      print(f"Sensor {source} data processed at Edge Node {Destination}")
# Step 5: Edge Node Process
def edge_node(env, edge_node_id, communication_network):
    """
    Simulate an Edge Node that processes data from IoT devices.
    """
    while True:
        # Collect and process data from connected devices (IoT sensors)
        edge_node_data = communication_network.nodes[edge_node_id]['processed_data']

        if edge_node_data:
            print(f"Edge Node {edge_node_id} processing data from devices: {edge_node_data}")
            communication_network.nodes[edge_node_id]['processed_data'] = {}  # Clear processed data
        else:
            print(f"Edge Node {edge_node_id} waiting for data.")

        # Wait before checking for more data
        yield env.timeout(random.randint(3, 7))
def Energy_Consumption_plot(sim):
  plt.plot(it,En)
  plt.xlabel('Time')
  plt.ylabel('Energy Consumption')
  plt.title('Energy Consumption Over Time')
  plt.yticks(range(1000,0 , -100))
  plt.xticks(range(0,sim,10))
  plt.grid(True)
  plt.show()
# Step 6: Simulate the Multi-hop IoT Network with Q-learning and Energy Minimization
def Blockchain_connection(device_id1,device_id2,communication_network):
    device1_private, device1_public, device1_private_pem, device1_public_pem = generate_keys()
    device1 = {"id": device_id1, "public_key": device1_public}

    # Device 2: Generate Keys
    device2_private, device2_public, device2_private_pem, device2_public_pem = generate_keys()
    device2 = {"id": device_id2, "public_key": device2_public}

    # Create a Blockchain instance
    blockchain = Blockchain()

    # Device 1 creates a transaction to send to Device 2
    message = f"Sensor{device1['id']} sent to Edge Node{device2['id']} test packets"
    signature = sign_transaction(device1_private, message)

    # Add the transaction to the blockchain (Device 1 sends funds to Device 2)
    blockchain.add_transaction(device1, device2, 100, {'signature': signature, 'recipient': device2['id'], 'packets': 100},message)

    # Device 1 mines a new block (Proof of Work)
    last_proof = blockchain.last_block['proof']
    proof = blockchain.proof_of_work(last_proof)

    # Device 1 creates a new block after mining
    previous_hash = blockchain.hash(blockchain.last_block)
    blockchain.create_block(proof, previous_hash)

    print("Blockchain after mining a new block:")
    for block in blockchain.chain:
        print(block)
def run_multi_hop_iot_edge_computing_q_learning_energy_simulation(num_devices=10, num_edge_nodes=2, connection_prob=0.3, simulation_time=100):
    # Create the IoT network with edge nodes
    global dead_node,E,Q
    E=100*num_devices
    G = create_multi_hop_iot_edge_network(num_devices, num_edge_nodes, connection_prob)

    # List of edge nodes (their IDs)
    edge_nodes = list(range(num_devices, num_devices + num_edge_nodes))

    # Define state and action space for Q-learning
    node_state_space = len(G.nodes)  # Each node can be considered a state
    max_neighbors = max(len(list(G.neighbors(node))) for node in G.nodes)
    action_space = max_neighbors# Number of neighbors each device has (possible actions)

    pos = nx.spring_layout(G, seed=42)  # You can use other layout algorithms if needed
    for node, position in pos.items():
        G.nodes[node]['pos'] = position
    # Energy consumption model (based on distance or hop count)
    energy_model = {}
    for node in G.nodes:
        energy_model[node] = {}
        for neighbor in G.neighbors(node):
            distance = np.linalg.norm(np.array(list(G.nodes[node]['pos'])) - np.array(list(G.nodes[neighbor]['pos'])))
            energy_model[node][neighbor] = alpha*(e_T*e_amp + e_amp*distance)  # Energy model: simple distance-based cost
            #print(distance)
    # Initialize Q-learning agent
    agent = QLearningAgent(state_space=node_state_space, action_space=action_space)

    # Set up the simulation environment
    env = simpy.Environment()
    for i in range(simulation_time):
    # Start SimPy processes for each IoT device (sensor) and edge node
      for device_id in range(num_devices):
        Blockchain_connection(device_id,np.random.choice(edge_nodes),G)
        if G.nodes[device_id]['energy'] <= 0 and dead_node!=num_devices:
          dead_node=dead_node+1
          continue
        data = G.nodes[device_id]['data']
        env.process(iot_device_with_q_learning(env, device_id, data, G, edge_nodes, agent, node_state_space, energy_model))
        #iot_device_with_q_learning( device_id, data, G, edge_nodes, agent, node_state_space, energy_model)
        #it.append(simulation_time)
      #for edge_node_id in edge_nodes:
        #env.process(edge_node(env, edge_node_id, G))
        #edge_node( edge_node_id, G)
    # Run the simulation

    env.run(until=simulation_time)

    # Visualize the network after the simulation
    visualize_network(G)
    agent.visualize_q_table()

# Run the Multi-hop IoT Edge Computing simulation with Q-learning for energy minimization

run_multi_hop_iot_edge_computing_q_learning_energy_simulation(num_devices=10, num_edge_nodes=2, connection_prob=0.3, simulation_time=1000)
