In [9]:
import pandas as pd
import random
import numpy as np
import copy
import networkx as nx
import matplotlib.pyplot as plt
import itertools
import collections
from collections import deque  # Add this import
#!which python


# Some functions may not work for multiplexing (such as getPossibleStates)

In [10]:
class QuantumNetworks():
    def __init__(self, initialEdges, pGen, pSwap, cutOffAge, maxLinks, goalEdges):
        self.initialEdges = copy.deepcopy(initialEdges)  # Don't really need a deep copu
        self.currentEdges = {} 
        self.pGen = pGen
        self.pSwap = pSwap
        self.cutOffAge = cutOffAge
        self.maxLinks = maxLinks
        self.goalEdges = goalEdges
        self.total_timesteps = 1
        self.goal_edge_counts = {edge: 0.1 for edge, _ in goalEdges}
    
    def reset(self) -> None:
        self.currentEdges = {}
    
    def getState(self) -> dict:
        return self.currentEdges
        
    def _generateEntanglement(self, node1, node2): # Extend later to attemptGenerateEntanglements()
        edge = tuple(sorted([node1, node2]))
        if edge not in self.currentEdges:
            self.currentEdges[edge] = deque([0])
        else:
            if len(self.currentEdges[edge]) < self.maxLinks:
                self.currentEdges[edge].appendleft(0) # Retain order by age

    def probalisticallyGenerateEntanglements(self):
        for edge in self.initialEdges:
            if random.random() < self.pGen:
                self._generateEntanglement(*edge)
    
    def discardEntanglement(self, edge: tuple):
        # Should never be the case that these aren't the same
        if edge in self.currentEdges and len(self.currentEdges[edge]) > 0:
            self.currentEdges[edge].pop() # TODO:
    
    def increaseGlobalEntanglementAge(self):
        for edge in list(self.currentEdges.keys()):  # Create a list to avoid modifying dict during iteration
            newAges = [age + 1 for age in self.currentEdges[edge] if age < self.cutOffAge]
            if not newAges:  # If no ages remain after filtering
                del self.currentEdges[edge]  # Remove the empty edge
            else:
                self.currentEdges[edge] = deque(newAges)

    def performSwapping(self, edge1: tuple, edge2: tuple): # Extend later to attemptSwapping()
        # Ensure valid swaps
        if edge1 not in self.currentEdges or edge2 not in self.currentEdges:
            print(f"Edge {edge1} or {edge2} not found in currentEdges")
        
        if len(self.currentEdges[edge1]) == 0 or len(self.currentEdges[edge2]) == 0:
            print(f"Edge {edge1} or {edge2} has no entanglement")
        
        # Swap entanglement with the new edge having ascending order ndoes
        combined_nodes = list(edge1) + list(edge2)
        unique_nodes = sorted([node for node in combined_nodes if combined_nodes.count(node) == 1])
        newLink = tuple(unique_nodes) 
        old_ages = self.currentEdges.get(edge1, deque([])) + self.currentEdges.get(edge2, deque([]))
        newAge = max(old_ages) if old_ages else 0
        if newLink in self.currentEdges:
            if len(self.currentEdges[newLink]) < self.maxLinks:
                self.currentEdges[newLink].append(newAge)
        else:
            self.currentEdges[newLink] = [newAge]
            
        self.discardEntanglement(edge1)
        self.discardEntanglement(edge2)          
        
    def getImmediatePossibleActions(self) -> list: # Only does T=1 steps
        active_edges = [edge for edge, ages in self.currentEdges.items() if len(ages) > 0]
        
        # Find all pairs of edges that share a node (potential swaps)
        possible_swaps = []
        for edge1, edge2 in itertools.combinations(active_edges, 2):
            # Check if edges share a node
            if set(edge1) & set(edge2):
                possible_swaps.append((edge1, edge2))
        
        # Add "no swap" action
        all_actions = [None]  # None represents "do nothing"
        all_actions.extend(possible_swaps)
        
        return all_actions
    
    def getPossibleStatesAndActionsWithReward(self) -> list:
        # Get all possible states and actions
        possible_states_and_swaps = self.getPossibleActionsAndCorrespondingStates()
        
        # Add rewards to the return tuple
        states_actions_rewards = []
        for state, action_sequence in possible_states_and_swaps:
            reward = self.calcReward(state)
            states_actions_rewards.append((state, action_sequence, reward))
        
        return states_actions_rewards

    def calcReward(self, state):
        reward = 0
        for goal_edge, weight in self.goalEdges:
            if goal_edge in state:
                goal_edge_edr = self.goal_edge_counts[goal_edge] / self.total_timesteps
                reward += weight / goal_edge_edr
        return reward


    
    def getPossibleActionsAndCorrespondingStates(self) -> list:
        active_edges = [edge for edge, ages in self.currentEdges.items() if len(ages) > 0]
        possible_swaps = []
        for edge1, edge2 in itertools.combinations(active_edges, 2):
            if set(edge1) & set(edge2):  # if edges share a node
                possible_swaps.append((edge1, edge2))
        
        # Modified to include states and their corresponding swap sequences
        possible_states_and_swaps = [(self.currentEdges, [])]  # (state, swap_sequence)
        
        # Consider sequential swaps up to a reasonable depth (e.g., 3)
        max_swap_depth = 3
        states_to_explore = [(copy.deepcopy(self.currentEdges), [])]
        
        for depth in range(max_swap_depth):
            next_states = []
            for current_state, swap_history in states_to_explore:
                # Find possible swaps in current state
                active_edges = [edge for edge, ages in current_state.items() if len(ages) > 0]
                current_possible_swaps = []
                for edge1, edge2 in itertools.combinations(active_edges, 2):
                    if set(edge1) & set(edge2):
                        current_possible_swaps.append((edge1, edge2))
                
                # Try each possible swap
                for swap in current_possible_swaps:
                    edge1, edge2 = swap
                    new_state = copy.deepcopy(current_state)
                    
                    # Perform the swap
                    combined_nodes = list(edge1) + list(edge2)
                    new_edge = tuple(sorted([n for n in combined_nodes 
                                        if combined_nodes.count(n) == 1]))
                    
                    # Remove old edges
                    new_state[edge1].pop()
                    new_state[edge2].pop()
                    if len(new_state[edge1]) == 0: del new_state[edge1]
                    if len(new_state[edge2]) == 0: del new_state[edge2]
                    
                    # Add new edge
                    if new_edge not in new_state:
                        new_state[new_edge] = deque([0])
                    else:
                        new_state[new_edge].appendleft(0)
                    
                    new_swap_history = swap_history + [swap]
                    next_states.append((new_state, new_swap_history))
                    possible_states_and_swaps.append((new_state, new_swap_history))
            
            states_to_explore = next_states
        
        return possible_states_and_swaps
                

In [15]:
def stateActionToKey(state, action=None):
    state_tuple = tuple(sorted((edge, tuple(ages)) for edge, ages in state.items()))
    
    if action is None:
        return state_tuple
    
    # Convert action (list of edge pairs) to tuple of sorted tuples
    # Each edge pair in the action should be sorted internally and the pairs should be sorted relative to each other
    if action:  # if action is not empty
        action_tuple = tuple(sorted(tuple(sorted(edge_pair)) for edge_pair in action))
    else:
        action_tuple = None
    
    return (state_tuple, action_tuple)
        
def epsilonGreedyPolicy(Q, state, epsilon):
    # Get all possible state-action pairs
    possible_states_actions = network.getPossibleActionsAndCorrespondingStates()
    print("Possible state-actions:", possible_states_actions)
    
    if random.random() < epsilon:
        print("Taking random action")
        return random.choice(possible_states_actions) if possible_states_actions else (state, None)
    
    else:
        print("Taking greedy action")
        max_q = float('-inf')
        best_state_action = (state, None)
        
        for state_action in possible_states_actions:
            state_action_key = stateActionToKey(state, state_action[1])
            q_value = Q[state_action_key]
            print(f"Action: {state_action[1]}, Q-value: {q_value}")
            if q_value > max_q:
                max_q = q_value
                best_state_action = state_action
        
        print(f"Chosen action: {best_state_action[1]} with Q-value: {max_q}")
        if max_q == float('-inf'):  # If no actions found in Q-table
            print("No Q-values found, choosing random action")
            return random.choice(possible_states_actions) if possible_states_actions else (state, None)
            
        return best_state_action

In [16]:
# Env Parameters
cutOffAge = 1
pSwap = 1
pGen= 0.7
maxLinks = 1 # Multiplexing ## Off for now
goalEdges = [((1,6), 1)] # (user, weight)
initialEdges = [(1,3), (2,3), (3,4), (4,5), (4,6)]
random.seed(1)


In [17]:
random.seed(27)
np.random.seed(27)
network = QuantumNetworks(initialEdges, pGen, pSwap, cutOffAge, maxLinks, goalEdges)
network.probalisticallyGenerateEntanglements()
print("Initial state:", network.getState())

possible_states = network.getPossibleStatesAndActionsWithReward()
for state, action, reward in possible_states:
    for goal_edge, _ in network.goalEdges:
        if goal_edge in state:
            print(f"Found goal edge {goal_edge} in state:")
            print(f"State: {state}")
            print(f"Action sequence: {action}")
            print(f"Reward: {reward}")
            print("---")

Initial state: {(1, 3): deque([0]), (4, 5): deque([0]), (4, 6): deque([0])}


In [19]:
Q = collections.defaultdict(lambda: random.uniform(0.0, 0.1))  # Optimistic initialization
# Required to, or we always pick the 'do nothting' action when we have no other options.
epsilon = 0.01
state = network.getState()
action = epsilonGreedyPolicy(Q, state, epsilon)
ikea
state = network.getState()
action = epsilonGreedyPolicy(Q, state, epsilon)
print(state)

Possible state-actions: [({(1, 3): deque([0]), (4, 5): deque([0]), (4, 6): deque([0])}, []), ({(1, 3): deque([0]), (5, 6): deque([0])}, [((4, 5), (4, 6))])]
Taking greedy action
Action: [], Q-value: 0.018333476966499396
Action: [((4, 5), (4, 6))], Q-value: 0.09096814486371707
Chosen action: [((4, 5), (4, 6))] with Q-value: 0.09096814486371707
Possible state-actions: [({(1, 3): deque([0]), (4, 5): deque([0]), (4, 6): deque([0])}, []), ({(1, 3): deque([0]), (5, 6): deque([0])}, [((4, 5), (4, 6))])]
Taking greedy action
Action: [], Q-value: 0.018333476966499396
Action: [((4, 5), (4, 6))], Q-value: 0.09096814486371707
Chosen action: [((4, 5), (4, 6))] with Q-value: 0.09096814486371707
{(1, 3): deque([0]), (4, 5): deque([0]), (4, 6): deque([0])}


In [21]:

"""a0 is action we take to reach s1, r1 is the reward for the state at s1 !!!!!!!!!!!!!!!!"""
# https://gibberblot.github.io/rl-notes/single-agent/n-step.html

network = QuantumNetworks(initialEdges, pGen, pSwap, cutOffAge, maxLinks, goalEdges)
episodeRewards = []
episodeLengths = []
n = 3
epsilon = 0.1 
numEpisodes = 1000
maxSteps = 1000
Q = collections.defaultdict(lambda: random.uniform(0.0, 0.01)) 

for episode in range(0, numEpisodes):
    network.reset()
    network.probalisticallyGenerateEntanglements() # Else our first timestep is relating to the empty intial state
    
    #Initialize
    state = network.getState()
    action = epsilonGreedyPolicy(Q, state, epsilon)

    # t0
    rewards = [0.0]
    states = [state]
    actions = [action]
    
    # Looping varaibles
    T = float('inf')
    tau = 0
    for t in range(0, maxSteps):        
        # Part 1
        if t < T:
            # Take action A_t
            # Observe and store in R_t+1 and S_t+1
            # IF s_t+1 is terminal DO
            #   T = t + 1
            # ELSE DO
            #   print('AHHHHH)
            
            
           """ if network.isTerminal():
                T = t + 1
            else:"""
                
        tau = t - n + 1
        
        # Part 2
        if tau >= 0: # Begin learning on previous n steps       
            if tau + n < T:
                 pass   
             
        if tau == T-1:
            break
        
        #state = next_state
        #action = next_action
    
    #episodeRewards[episode] = sum(rewards)
    #episodeLengths[episode] = len(rewards)
    

Possible state-actions: [({(1, 3): deque([0]), (2, 3): deque([0]), (3, 4): deque([0]), (4, 5): deque([0]), (4, 6): deque([0])}, []), ({(3, 4): deque([0]), (4, 5): deque([0]), (4, 6): deque([0]), (1, 2): deque([0])}, [((1, 3), (2, 3))]), ({(2, 3): deque([0]), (4, 5): deque([0]), (4, 6): deque([0]), (1, 4): deque([0])}, [((1, 3), (3, 4))]), ({(1, 3): deque([0]), (4, 5): deque([0]), (4, 6): deque([0]), (2, 4): deque([0])}, [((2, 3), (3, 4))]), ({(1, 3): deque([0]), (2, 3): deque([0]), (4, 6): deque([0]), (3, 5): deque([0])}, [((3, 4), (4, 5))]), ({(1, 3): deque([0]), (2, 3): deque([0]), (4, 5): deque([0]), (3, 6): deque([0])}, [((3, 4), (4, 6))]), ({(1, 3): deque([0]), (2, 3): deque([0]), (3, 4): deque([0]), (5, 6): deque([0])}, [((4, 5), (4, 6))]), ({(4, 6): deque([0]), (1, 2): deque([0]), (3, 5): deque([0])}, [((1, 3), (2, 3)), ((3, 4), (4, 5))]), ({(4, 5): deque([0]), (1, 2): deque([0]), (3, 6): deque([0])}, [((1, 3), (2, 3)), ((3, 4), (4, 6))]), ({(3, 4): deque([0]), (1, 2): deque([0]

implement a reward function for getPossibleStatesAndActions() to become getPossibleStatesAndActionsWithReward()
