In [None]:
%matplotlib inline

In [None]:
import numpy as np
# Option 1.1 channel_state: number of users
# Option 1.2 channel_state: interference power

In [None]:
class Environment(object):
    """Currently no propogation effect is considered"""

    def __init__(self, channel_num):
        """channel_num: number of RF channels in the environment"""
        self.channel_state = [set() for i in range(channel_num)]
        self.history = []
        
    def one_time_step(self):
        self.history.append(self.channel_state)
        self.channel_state = [set() for i in range(channel_num)]

    def join(self, channel_index, agent):
        self.channel_state[channel_index].add(agent)

    def query(self, channel_index):
        return self.history[-1][channel_index]

    def report(self):
        last_state = self.history[-1]
        return [len(s) for s in last_state]

    def get_reward(self):
        # TODO Give success (1/task), conflict(-1/task)
        # The instructor is the environment (receiver),
        # which can evaluate how good the agent is doing by checksum.
        # The reward message is passed over signaling channel.
        # Since the frequent receiver to agent interaction wastes bandwidth,
        # the agent can only get back reward after its tasks are all finished
        # or the maximum time step is reached.
        return

In [None]:
e = Environment(10)
a = Agent(10,10)

In [None]:
e.join(1,a)

In [None]:
a is e.channel_state[1].pop()

In [None]:
class Agent(object):

    def __init__(self, task_num, channel_num):
        """task_num: number of task to be transmitted"""
        self.task_num = task_num
        self.channels = np.zeros(channel_num)
        self.part_state = []
        self.reward = 0

    def one_time_step(self, env):
        if self.task_num <= 0:
            # TODO Calculate the final reward
            self.reward += env.get_reward()
        # TODO choose operations (transmit, expand, shrink, 
        # observe, rest, communicate) based on part_state
        # Since we want to finish tasks as soon as possible
        self.reward -= 1

    def rest(self):
        """Do nothing"""
        return
    
    def transmit(self):
        """Transmit using the established channels"""
        for index in self.channels:
            if self.task_num == 0:
                env.join(index, self)
                self.task_num -= 1

    def expand(self, env, channel_index):
        """Establish a new channel by coordinate with receiver.
        channel_index: the index of channel to occupy"""
        if channel_index not in self.channels:
            self.channels.add(channel_index)
            # Since expand operation need signaling bandwidth to coordinate
            self.reward -= 1

    def shrink(self, env, channel_index):
        """Pull down a channel by coordinate with receiver.
        channel_index: the index of channel to release"""
        if channel_index in self.channels:
            self.channels.remove(channel_index)
            # Shrink operation need signaling bandwidth to coordinate (-1)
            # Shring operation save resource in agent and receiver (+2)
            self.reward += 1

    def observe(self, env):
        """Observe the channel usage"""
        self.part_state.append(env.report()) 
        # Since observe operation need energy to detect occupancy
        # TODO find the channel with highest availability
        self.reward -= 0.2
        
    def identify(self, env, channel_index):
        """Find the agent occupying the channel by identifying the signal"""
        # Since identify signal need energy
        self.reward -= 1
        return env.query(channel_index)

    def communicate(self):        
        # Since communicate operation need signaling bandwidth to coordinate two group
        # TODO How to communicate efficiently, since the cost is high
        # TODO Provide how many tasks left and self.reward
        # TODO Provide a score or loss (above is one possible metric)
        # TODO Need to add constrict over score to avoid malicious deception
        # TODO 可能手段：（1）入网审查，（2）持续检测加入时间和后续发送数量
        self.reward -= 2
    
    def report(self):
        return self.task_num, self.channels

    def reward(self, env):
        # TODO Try to get back reward from environment (receiver)
        return