In [None]:
%matplotlib inline

In [None]:
import numpy as np
# Option 1.1 channel_state: number of users
# Option 1.2 channel_state: interference power

In [None]:
class Environment(object):
    """Currently no propogation effect is considered"""

    def __init__(self, channel_num):
        """channel_num: number of RF channels in the environment"""
        self.channel_num = channel_num
        self.channel_state = [[] for i in range(self.channel_num)]
        self.history = []
        
    def one_action_step(self):
        """save last channel state
           enter to next time step and initialize channel state
        """
        self.history.append(self.channel_state)
        self.channel_state = [[] for i in range(self.channel_num)]

    def join(self, channel_index, agent):
        """accept transmitting of a certain agent"""
        self.channel_state[channel_index].append(agent)

    def query(self, channel_index):
        """provide identification result
           return the list of agents occupying the certain channel
        """
        return self.history[-1][channel_index]

    def report(self):
        """provid observation result
           return the number of channel users 
        """
        last_state = self.history[-1]
        return [len(l) for l in last_state]

    def get_reward(self):
        # TODO Give success (1/task), conflict(-1/task)
        # The instructor is the environment (receiver),
        # which can evaluate how good the agent is doing by checksum.
        # The reward message is passed over signaling channel.
        # Since the frequent receiver to agent interaction wastes bandwidth,
        # the agent can only get back reward after its tasks are all finished
        # or the maximum time step is reached.
        return

In [None]:
class Agent(object):

    def __init__(self, env, task_num):
        """task_num: number of task to be transmitted
           env: the environment to operate in
           self.channels: established channels with receiver
           self.part_state: observed channel states
           self.part_agent: identified agents in each channel
        """
        self.env = env
        self.task_num = task_num
        self.channels = set()
        self.part_state = []
        self.part_agent = [[] for i in range(self.env.channel_num)]
        self.agent_id = {}
        self.reward = 0
        
        self.action_queue = []
        self.args_queue = []

    def one_action_step(self):
        """interact with environment and other agents in one time step
        """
        if self.task_num == 0:
            # all job just finished and get back the final reward from env
            self.reward += self.env.get_reward()
            # mark all job has finished
            self.task_num = -1
            return
        if self.task_num == -1:
            # all job has finished and begin to tear down channels
            if len(self.channels)>0:
                index = self.channels.pop()
                self.tear(index)            
            return 
        if len(self.action_queue)>0：
            # execute the actions scheduled by communication
            action = self.action_queue.pop(0)
            args = self.args_queue.pop(0)
            action(args)
            # job has not finished at last step, punish for the delay
            # since we want to finish tasks as soon as possible
            self.reward -= 1
            return
        
        # TODO: Q learning to choose operation (function and args)
        # function: rest,transmit,establish,tear,observe,identify,communicate
        # args: channel_index, target_agent
        # channel_index: 0~channel_num
        # target_agent: self.agent_id.get(target_agent_id,'null')
        # status: task_num,channels,part_state,part_agent
        # find f: function, channel_index, target_agent_id = f(status)
         

    def rest(self, *args):
        """reset in this time step and do nothing
        """
        return
    
    def transmit(self, *args):
        """transmit using the established channels
           TODO: choose channel based on certain policy
        """
        for index in self.channels:
            if self.task_num == 0: break
            self.env.join(index, self)
            self.task_num -= 1       

    def establish(self, channel_index, *args):
        """establish a new channel by coordinate with receiver
           channel_index: the index of channel to occupy
        """
        if channel_index not in self.channels:
            self.channels.add(channel_index)
            # Since expand operation need signaling bandwidth to coordinate
            self.reward -= 1

    def tear(self, channel_index, *args):
        """tear down a channel by coordinate with receiver
           channel_index: the index of channel to release
        """
        if channel_index in self.channels:
            self.channels.remove(channel_index)
            # Shrink operation need signaling bandwidth to coordinate (-1)
            # Shrink operation save resource in agent and receiver (+1)
            # Shrink operation ficilitate collabaration(+1)
            self.reward += 2

    def observe(self, *args):
        """observe the channel usages
           save number of users in each channel
           TODO: may can add find channel with highest availability
           TODO: in this stage, we let Q-learning find the candidate channel
        """
        self.part_state.append(self.env.report()) 
        # Since observe operation need energy to detect occupancy
        self.reward -= 0.5
        # TODO find the channel with highest availability (least occupied)
        
    def identify(self, channel_index, *args):
        """identifying the agent occupying the channel 
           by classifying signal (implemented as environment query)
           save user occupying the perticular channel
           channel_index: the index of the channel to identify
        """
        users = self.env.query(channel_index)
        for u in users:
            ags = filter(lambda x: u is x[1], self.agent_id.items())
            if len(ags) ==0:
                id_num = max(agent_id.keys())+1
                self.agent_id[id_num] = u
                self.part_agent(channel_index).append(id_num)
            else:
                self.part_agent(channel_index).append(ags[0])
        # Since identify signal need energy
        self.reward -= 0.5

    def communicate(self, channel_index, target_agent, *args):
        """communicate with destinate agent over signaling channel
           say desired channel, exchange priority and schedule establish action
           target: the target agent to communicate with
        """
        score = priority()
        target_score = target_agent.feedback(channel_index, score)
        if score > target_score:
            self.action_queue.append(self.establish)
            self.args_queue.append((channel_index,))
        # Since communicate operation need signaling bandwidth to coordinate
        self.reward -= 1
        
    def feedback(self, channel_index, score):
        """schedule tear down action if score is higher
           return the priority score of this agent
        """
        if score > priority() and channel_index in self.channels:
            self.action_queue.append(self.tear)
            self.args_queue.append((channel_index,))
        return priority()
        
    def priority(self):
        """calculate priority score, or loss of the agent
           possible metric is to combine tasks num and self.reward
           need to add constrict over score to avoid malicious deception
           assume 通过（1）设备入网审查；（2）持续检测加入时间和后续发送数，保证score真实性
        """
        return -self.task_num + self.reward
    
    def report(self):
        return self.task_num, self.channels

    def reward(self):
        # TODO Try to get back reward from environment (receiver)
        return