In [6]:
from abc import ABCMeta, abstractmethod
import torch

# MC Tree
### MC_note
##### attributes
- state, type of list ???
- edges, type of list
- N, type of int
    - number of times, the note been explored
- id, type of string
    - identify number

##### method
- add_edge()
- add_edges()
- is_leaf()
    - return bool
- get_N()
- get_state()

### MC_edge
##### attributes
- action, type of string
- in_node, type of MC_note
- out_node, type of MC_note
- N, type of int
- id, type of string
- Q, type of double
- U, action bonus
- W, type of double
- P, type of double

##### methods
- get_in_node()
- get_out_node()
- get_state()
    - return (Q, W, N, P)
- get_value()
- recalculate_value()
- set_q(q)

### MC_tree
##### attributes
- root, type of mc_node
- path, type of list
- tree, dictionary
- cpuct

##### methods
- add_node()
- back_fill()
- expansion()
- selection()
- set_root()
- simulation()

In [28]:
class MC_node():
    def __init__(self, state, N = 0):
        self.state = state
        self.id = self.state.get_id()
        self.N = N
        self.edges = []
    def add_edge(e):
        self.edges.append(e)
    def add_edges(es):
        self.edges.extend(es)
    def is_leaf(self):
        if len(self.edges) == 0:
            return True
        else:
            return False
    def get_id(self):
        return self.id
    def get_N(self):
        return self.N
    def get_state(self):
        return self.state

class MC_edge():
    def __init__(self, action, in_node, out_node, priori):
        self.action = action
        self.in_node = in_node
        self.out_node = out_node
        self.id = in_node.get_id() + '-' + out_node.get_id()
        self.N = 0
        self.W = 0
        self.Q = 0
        self.U = priori
        self.P = priori # 获胜的概率，由网络得到
        self.value = self.Q + self.U
    def get_in_node(self):
        return self.in_node
    def get_out_node(self):
        return self.out_node
    def get_action(self):
        return self.action
    def get_state(self):
        return (self.Q, self.U, self.W, self.N, self.P)
    def get_value(self):
        # return the value of the edge, the one here is just for test --------------------
        # should add an new attribute value for the class edge???? -----------------------
        return self.value
    def set_Q(q):
        self.Q = q

class MC_tree():
    def __init__(self, root_state, cpuct, logger = None):
        self.root = MC_node(root_state)
        self.path = []
        self.tree = {}
        self.cpuct = cpuct
        self.add_node(self.root)
    def add_node(self, node):
        if node.get_id() not in self.tree.keys():
            self.tree[node.get_id()] = node
            return 1
        else:
            print("node exist")
            return 0
    def back_fill(self, value):
        # update all the node and edge in the path
        #   - N add 1 for each edge
        #   - N add 1 for each Node 
        #   - recalculate the Q and W value of edge according to the new N value.
        for edge in self.path:
            edge.N += 1
            edge.in_node.N += 1
            edge.W = e.W + value
            edge.Q = edge.W/edge.N
            edge.U = (self.cpuct * math.sqrt(edge.in_node.N) * edge.priori)/ edge.N
            edge.value = edge.Q + edge.U
    def expansion(self, leaf_state, actions, states, values):
        # values here is just the output of the p_net, 
        # still not sure what kind of infos should be setted to the edge here ----------------
        # TODO
        # get the leaf node
        for action, state, value in zip(actions, states, values):
            out_node = MC_node(state)
            edge = MC_edge(action, leaf, out_node, value)
            leaf.add_edge(edge)
    def selection(self, current_state):
        # move to the leaf and save the path  
        self.path = []
        current_node = MC_node(current_state)
        if current_node.get_id() not in self.tree.keys():
            return current_node, self.path
        else:
            while not current_node.is_leaf():
                tmp_edge = max(current_node.edges, key= lambda x: x.get_value())
                current_node = tmp_edge.get_out_node()
                self.path.append(tmp_edge)
            return current_node, self.path
    def set_root(self, node):
        self.root = node
        path = []

# Agent
### Agent
##### attributes
- game
- root
- mct


##### methods
- evaluate_leaf_state()
- get_suggestion()
- simulation()
- take_action()
- train_network()

In [44]:
#应该区分root和current state
class Agent():
    def __init__(self, game, cpuct, logger = None):
        self.game = game
        self.root_state = game.get_current_state()
        self.mct = MC_tree(self.root_state, cpuct)
        #self.p_net = p_net
    def episode(self):
        leaf,_ = self.mct.selection(self.root_state)
        # test if the state of leaf staying in the done position
        #  - yes, get the reward and fill back
        #  - no, do the expansion of the leaf
        if self.game.is_done(leaf.get_state()):
            end_reword == self.game.get_end_reword(state)
        else:
            available_actions, states, values = self.evaluate_leaf_state(leaf.get_state())
            self.mct.expansion(leaf, available_actions, states, values)
            end_reword = self.simulation(leaf.get_state())
        self.mct.backfill(end_reword)
    def evaluate_leaf_state(self, state_leaf):
        #state_leaf = leaf.get_state()
        available_actions = self.game.get_available_actions(state_leaf)
        states = []
        values = []
        for action in available_actions:
            new_state = self.simulate_action(state_leaf, action)
            value_new_state = self.game.get_reward(new_state)
            states.append(new_state)
            values.append(value_new_state)
        return available_actions, states, values
    def get_suggestion():
        leaf, path = self.mct.selection(self.root_state)
        path = [e.get_action() for e in path]
        if self.game.is_done(leaf.get_state()):
            return path
        else:
            current_state = leaf.get_state()
            while not self.game.is_done(current_state):
                actions, states, values = evaluate_leaf_state(current_state)
                action, current_state, _ = max(zip(actions, states, values), key = lambda x: x[2])
                path.append(action)
            return path
    def simulate_action(self, cuu_state, action):
        return self.game.simulate_action(cuu_state, action)
    def simulation(self, tmp_state):
        while not self.game.is_done(tmp_state):
            _, states, values = evaluate_leaf_state(tmp_state)
            tmp_state, _ = max(zip(states, values), key = lambda x: x[1])
        return self.game.get_end_reword()
    def take_action(self, action):
        # return the state after taking the action.
        self.game.take_action()
        self.root = self.game.get_current_state()
    def train_network():
        # could the episode data used to train the model again??? ---> no
        # shoud i add another model to the to predict the result and the simulation step just use random choose
        # it can enfast the process and verringt the error
        # then i can use the mcst to generate some data, and calculate the real answer. 
        # then used these data to train the model again.
        pass

# Game
### Game(abstract)
##### abstractmethods
- get_available_actions(self, state)
- get_current_state(self)
- get_end_reword(self)
- is_done(self, state)
- restore_game(self)
- take_action(self, action)
### TS
##### attributes
- lens
- thread
- value_network
- restore_game
##### methods:
- get_available_actions(self, state)
- get_current_state(self)
- get_end_reword(self)
- is_done(self, state)
- restore_game(self)
- take_action(self, action)

In [65]:
class Game(metaclass = ABCMeta):
    @abstractmethod
    def get_available_actions(self, state):
        """
        input:
          state, can be any kind of type
        output:
          actions: list of action, action should be string
        """
        pass
    def get_current_state(self):
        pass
    @abstractmethod
    def is_done(self, state):
        """
        input:
          state
        output:
          out: if the game is done return the result, else return 0
        """
        pass
    @abstractmethod
    def restore_game(self):
        pass
    @abstractmethod
    def simulate_action(self):
        pass
    @abstractmethod
    def take_action(self):
        pass

class TS(Game):
    def __init__(self, lens, thread, value_network):
        """
        input:
          lens, type of int
              total number of parts
          thread, type of float
              value used to decide, weather the seq is good or not
          value_network, type of pkl
              neural network, that used to output the finalpunkt.
        """
        super(TS, self).__init__()
        self.actions = ['1', '2', '3', '4', '5', '6']
        self.lens = lens
        self.thread = thread
        self.value_network = value_network   
        self.restore_game()
        self.setup_lookup()
    def get_available_actions(self, state):
        return self.actions
    def get_current_state(self):
        return self.current_state
    def get_original_state(self):
        return State([])
    def get_reward(self, state, is_done = False):
        inp_nn = self._transform_state_to_input(state)
        out_nn = self.value_network(state)
        score = self._transform_output_to_value(out_nn)
        if not is_done:
            return score
        if score <= thread:
            return 1
        else:
            return -1
    def _transform_state_to_input(self, state):
            #???
            out = []
            for action in state.state:
                out.append(self.lookup[action])
            return torch.FloatTensor(out).unsqueeze(0)
    def _transform_ouput_to_value(self, output):
            #???
            return output.mean()
    def is_done(self, state):
        if len(state) == self.lens:
            # done
            return True
        else:
            # not jet
            return False
    def restore_game(self):
        self.current_state = self.get_original_state()
    def simulate_action(self, cuu_state, action):
        cuu_state.take_action(action)
        return cuu_state
    def setup_lookup(self):
        self.lookup = {'1': [1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                       '2': [0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                       '3': [0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
                       '4': [0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
                       '5': [0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
                       '6': [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]}
    def take_action(self, action):
        # change state after taking the action
        self.current_state.take_action(action)
                
class State():
    def __init__(self, state):
        self.state = state
        self.id = str(self.state)
    def __len__(self):
        return len(self.state)
    def get_id(self):
        return self.id
    def take_action(self, action):
        self.state.append(action)
        self.id = str(self.state)

## test

In [66]:
model = torch.load('../models/rnn_predict.pkl')
game = TS(50, 4.0, model)

In [67]:
agent = Agent(game, 0.6)

In [68]:
agent.episode()

TypeError: transpose(): argument 'input' (position 1) must be Tensor, not State

In [10]:
# bild the model, loss and data class
class TS_rnn(torch.nn.Module):
    """
    scores for each piece
    input:
        tensor size of (batch_size, seq_len, num_dim)
    output:
        tensor size of (batch_size, seq_len)
    """
    def __init__(self):
        super(TS_rnn, self).__init__()
        #change the structure of the network
        num_inp = 8
        num_hidden = 64
        self.rnn = torch.nn.LSTM(input_size = num_inp, hidden_size = num_hidden, num_layers = 2)
        self.mlp = torch.nn.Sequential(
                torch.nn.Linear(num_hidden, 16),
                torch.nn.Dropout(),
                torch.nn.ReLU(),
                torch.nn.Linear(16, 1)
                )

    def forward(self, inp):
        # input of the rnn (seq_len, batch, input_size)
        data_in = torch.transpose(inp, 0, 1)
        # run rnn, it has two output
        out_rnn, _ = self.rnn(data_in)
        out_rnn = torch.transpose(out_rnn, 0, 1) # (batch_size, seq_len, num_dim)
        # rnn the mlp
        batch_size, seq_len, num_dim = out_rnn.shape
        out = []
        for i in range(seq_len):
            tmp = self.mlp(out_rnn[:, i,:])
            out.append(tmp)
        # now out is list of (batch_size, 1), combine the items in the list to get the output with size (batch_size, seq_len)
        out = torch.cat(out, 1)
        return out.squeeze()

class TS_rnn2(torch.nn.Module):
    """
    scores only for the whole task
    input:
        tensor size of (batch_size, seq_len, num_dim)
    output:
        tensor size of (batch_size)
    """
    def __init__(self):
        super(TS_rnn2, self).__init__()
        #change the structure of the network
        num_inp = 8
        num_hidden = 64
        self.rnn = torch.nn.LSTM(input_size = num_inp, hidden_size = num_hidden, num_layers = 2)
        self.mlp = torch.nn.Sequential(
                torch.nn.Linear(num_hidden, 64),
                torch.nn.Dropout(),
                torch.nn.ReLU(),
                torch.nn.Linear(64, 1)
                )

    def forward(self, inp):
        # input of the rnn (seq_len, batch, input_size)
        data_in = torch.transpose(inp, 0, 1)
        # run rnn, it has two output
        out_rnn, _ = self.rnn(data_in)
        out_rnn = torch.transpose(out_rnn, 0, 1) # (batch_size, seq_len, num_dim)
        # only use the last output
        out_rnn = out_rnn[:, -1, :].squeeze()
        # rnn the mlp
        out = self.mlp(out_rnn)
        return out.squeeze()
    
class PDLoss(torch.nn.Module):
    def __init__(self, p = 2):
        super(PDLoss, self).__init__()
        self.pd = torch.nn.PairwiseDistance(p)

    def forward(self, o, t):
        # out: (batch_size, 1)
        out = self.pd(o, t)
        return out.mean()

class Data:
    """
    data class for TS_rnn
    """
    def __init__(self, x, y):
        self.data = {}
        self.data['train_x'] = self.add_file(x)
        self.data['train_y'] = self.add_file(y)[:, :, -1] # use the first metric tempately
        assert(len(self.data['train_x']) == len(self.data['train_y']))
        self.len = len(self.data['train_x'])

    def add_file(self, path):
        return torch.from_numpy(np.load(path))

    def add_scores(self, path):
        return torch.FloatTensor([float(li.rstrip('\n')) for li in open(path)])

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        return (self.data['train_x'][index],
                self.data['train_y'][index])

class Data2:
    """
    data class for TS_rnn2
    """
    def __init__(self, x, y):
        self.data = {}
        self.data['train_x'] = self.add_file(x)
        self.data['train_y'] = self.add_file(y)[:, :, -1] # use the first metric tempately
        self.data['train_y'] = torch.mean(self.data['train_y'], 1)
        assert(len(self.data['train_x']) == len(self.data['train_y']))
        self.len = len(self.data['train_x'])

    def add_file(self, path):
        return torch.from_numpy(np.load(path))

    def add_scores(self, path):
        return torch.FloatTensor([float(li.rstrip('\n')) for li in open(path)])

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        return (self.data['train_x'][index],
                self.data['train_y'][index])

In [11]:
# write the test function
def test_model(dl_test, model, loss):
    model.eval()
    test_loss = 0
    counter = 0
    for batch_idx, dat in enumerate(dl_test):
        counter += 1
        # codes to be changed
        inp, target = dat
        out = model(inp)
        lo = loss(out, target.squeeze())
        test_loss += lo.data
    return test_loss/counter