In [2]:
import math
import json
from collections import OrderedDict

import numpy as np
import torch
import torch.utils.data
import torch.nn.functional as F
import model_utils.configure as conf
"""
0: sos/eos
1: no connection
2: connection
3: CONV1X1
4: CONV3X3
5: MAXPOOL3X3
6: OUTPUT
"""
        
class ControllerDataset(torch.utils.data.Dataset):
    def __init__(self, file_path):
        super(ControllerDataset, self).__init__()

        self.adjacency_matrices = []
        self.operations = []
        self.sequences = []

        with open(file_path, "r") as f:
            for line in f.readlines():
                line = line.strip()

                jo = json.loads(line, object_pairs_hook=OrderedDict)

                self.adjacency_matrices.append(jo['module_adjacency'])
                self.operations.append(jo['module_operations'])
                self.sequences.append(jo['sequence'])

    def __getitem__(self, index):
        operations = self.operations[index]
        num_nodes = len(operations)

        ops = []
        for op in operations:
            if op == CONV1X1:
                ops.append(3)
            elif op == CONV3X3:
                ops.append(4)
            elif op == MAXPOOL3X3:
                ops.append(5)
            elif op == OUTPUT:
                ops.append(6)
            if op == INPUT:
                ops.append(7)

        sample = {
            'matrix' : torch.LongTensor(self.adjacency_matrices[index]),
            'operations': torch.LongTensor(ops),
            'sequence': torch.LongTensor(self.sequences[index])
        }
        return sample
    
    def __len__(self):
        return len(self.sequences)

In [8]:
def collate_fn(samples):
    ## transform into batch samples

    ## create node -> global idnex
    ## global index -> neighbor's global index

    degree_max_size = conf.degree_max_size
    graph_size = conf.graph_size
    # degree_max_size = 5
    # graph_size = 7
    seq_max_length = int((graph_size+2)*(graph_size-1)/2)

    g_idxs = []
    g_fw_adjs = []
    g_bw_adjs = []
    g_operations = []
    g_sequence = []
    g_num_nodes = []

    g_idx_base = 0
    for g_idx, sample in enumerate(samples):
        matrix = sample['matrix']
        num_nodes = matrix.shape[0]
        g_num_nodes.append(num_nodes)

        for row in range(num_nodes):
            g_fw_adjs.append(list())
            g_bw_adjs.append(list())

        for row in range(num_nodes):
           for col in range(row+1, num_nodes):
            if matrix[row][col] :
                g_fw_adjs[g_idx_base + row].append(g_idx_base + col)
                g_bw_adjs[g_idx_base + col].append(g_idx_base + row)

        for op in sample['operations']:
            g_operations.append(op)

        sequence = sample['sequence']

        sequence = torch.cat([sequence, torch.LongTensor([0] * (seq_max_length - len(sequence)))])
        g_sequence.append(sequence)

        g_idx_base += num_nodes

    for idx in range(len(g_fw_adjs)):
        g_fw_adjs[idx].extend([g_idx_base] * (degree_max_size - len(g_fw_adjs[idx])))
        g_bw_adjs[idx].extend([g_idx_base] * (degree_max_size - len(g_bw_adjs[idx])))
        
    g_operations.append(0)

    g_num_nodes = torch.LongTensor(g_num_nodes)

    # [batch_size, conf.degree_max_size]
    g_fw_adjs = torch.LongTensor(g_fw_adjs)
    g_bw_adjs = torch.LongTensor(g_bw_adjs)

    # [batch_size +1] # due to padding
    g_operations = torch.LongTensor(g_operations)

    # [sum of sequence_length]
    g_sequence = torch.stack(g_sequence)
    print("g_fw_adjs size: {}".format(g_fw_adjs.size()))
    print("g_bw_adjs.size() : {}".format(g_bw_adjs.size()))
    print("num_nodes.size() : {}".format(num_nodes.size()))
    print("operation.size() : {}".format(operation.size()))
    print("operation.size() : {}".format(operation.size()))

    return {
            'num_nodes' : g_num_nodes,
            'fw_adjs': g_fw_adjs,
            'bw_adjs': g_bw_adjs,
            'operations': g_operations,
            'sequence': g_sequence
            }

In [47]:
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
import tensorflow as tf
from layers import Layer, Dense
from inits import glorot, zeros
"""

class tMeanAggregator(nn.Module):
    """Aggregates via mean followed by matmul and non-linearity."""

    def __init__(self, input_dim, output_dim, neigh_input_dim=None,
            dropout=0, withBias=True, act=F.relu,
            concat=False, mode="train", **kwargs):
        super(tMeanAggregator, self).__init__(**kwargs)

        self.dropout = dropout
        self.withBias = withBias
        self.act = act
        self.concat = concat
        self.mode = mode

        
        if neigh_input_dim == None:
            neigh_input_dim = input_dim

        if concat:
            self.output_dim = 2 * output_dim

        self.neigh_weights = nn.init.xavier_uniform_(torch.empty(neigh_input_dim, output_dim))
        self.self_weights = nn.init.xavier_uniform_(torch.empty(input_dim, output_dim))
        
        if self.withBias:
            self.bias = torch.zeros(self.output_dim)

        self.input_dim = input_dim
        self.output_dim = output_dim

    def forward(self, self_vecs, neigh_vecs, neigh_len=0):
        if self.mode == "train":
            neigh_vecs = F.dropout(neigh_vecs, self.dropout)
            self_vecs = F.dropout(self_vecs, self.dropout)

        # reduce_mean performs better than mean_pool
        neigh_means = torch.mean(neigh_vecs, dim=1)
        # neigh_means = mean_pool(neigh_vecs, neigh_len)

        # [nodes] x [out_dim]
        from_neighs = torch.matmul(neigh_means, self.neigh_weights)
        from_self = torch.matmul(self_vecs, self.self_weights)

        if not self.concat:
            output = torch.add(from_self, from_neighs)
        else:
            output = torch.cat([from_self, from_neighs], dim=1)

        # bias
        if self.withBias:
            output += self.bias

        return self.act(output)

In [2]:
import os
import sys
import glob
import time
import copy
import logging
import random
import numpy as np
import torch
import torch.nn as nn
import torch.utils
import torch.nn.functional as F
import model_utils.configure as conf
from model import Graph2Seq
import utils

import math
import json
from collections import OrderedDict

import numpy as np
import torch
import torch.utils.data
import torch.nn.functional as F

INPUT = 'input'
CONV1X1 = 'conv1x1-bn-relu'
CONV3X3 = 'conv3x3-bn-relu'
MAXPOOL3X3 = 'maxpool3x3'
OUTPUT = 'output'

mode = "train"
random.seed(conf.seed)
np.random.seed(conf.seed)
torch.manual_seed(conf.seed)
logging.info("conf = %s", conf)

conf.source_length = conf.encoder_length = conf.decoder_length = (conf.nodes + 2) * (conf.nodes - 1) // 2
epochs = conf.epochs


#model = Graph2Seq(mode=mode, conf=conf)

# load data
dataset = utils.ControllerDataset(conf.data_file_path)

In [3]:
import logging
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

SOS_ID = 0
EOS_ID = 0

class Attention(nn.Module):
    def __init__(self, input_dim, source_dim=None, output_dim=None, bias=False):
        super(Attention, self).__init__()
        if source_dim is None:
            source_dim = input_dim
        if output_dim is None:
            output_dim = input_dim
        self.input_dim = input_dim
        self.source_dim = source_dim
        self.output_dim = output_dim
        self.input_proj = nn.Linear(input_dim, source_dim, bias=bias)
        self.output_proj = nn.Linear(input_dim + source_dim, output_dim, bias=bias)
    
    def forward(self, input, source_hids, mask=None):
        batch_size = input.size(0)
        source_len = source_hids.size(1)

        # (batch, tgt_len, input_dim) -> (batch, tgt_len, source_dim)
        x = self.input_proj(input)

        # (batch, tgt_len, source_dim) * (batch, src_len, source_dim) -> (batch, tgt_len, src_len)
        attn = torch.bmm(x, source_hids.transpose(1, 2))
        if mask is not None:
            attn.data.masked_fill_(mask, -float('inf'))
        attn = F.softmax(attn.view(-1, source_len), dim=1).view(batch_size, -1, source_len)
        
        # (batch, tgt_len, src_len) * (batch, src_len, source_dim) -> (batch, tgt_len, source_dim)
        mix = torch.bmm(attn, source_hids)
        
        # concat -> (batch, tgt_len, source_dim + input_dim)
        combined = torch.cat((mix, input), dim=2)
        # output -> (batch, tgt_len, output_dim)
        output = torch.tanh(self.output_proj(combined.view(-1, self.input_dim + self.source_dim))).view(batch_size, -1, self.output_dim)
        
        return output, attn


class Decoder(nn.Module):
    
    def __init__(self,
                mode,
                hidden_dim,
                embedding_vocab_size,
                decoder_vocab_size,
                dropout,
                length,
                layers
                ):
        super(Decoder, self).__init__()
        self.mode = mode
        self.hidden_dim = hidden_dim
        self.length = length
        self.embedding_vocab_size = embedding_vocab_size
        self.decoder_vocab_size = decoder_vocab_size
        self.layers = layers
        self.rnn = nn.LSTM(self.hidden_dim, self.hidden_dim, self.layers, batch_first=True, dropout=dropout)
        self.init_input = None
        self.embedding = nn.Embedding(self.embedding_vocab_size, self.hidden_dim)
        self.dropout = dropout
        self.attention = Attention(self.hidden_dim)
        self.out = nn.Linear(self.hidden_dim, self.decoder_vocab_size)
        self.n = int(math.floor(math.sqrt((self.length + 1) * 2)))
        self.offsets=[]
        for i in range(self.n):
            self.offsets.append( (i + 3) * i // 2 - 1)
    
    def forward(self, x, num_nodes, initial_states=None, encoder_hidden=None, targets=None):

        """
        x는 initial input of each batch
        that is, [batch_size, graph_embeddindg_dimension]
        """
        ## train이든 test 이든
        ## graph embedding을 첫 input으로 받아서 -> 계속해서 전 단계의 embedding을 받겟지.

        ## encoder last state와 attention : 이때 graph embedding은 제외 -> decoder output

        ## train이면 output과 실제 target을 비교하고, 다음 input은 target의 element 값이 되고
        ## loss function을 비교해서 gradient update

        ## test이면 output이 다음 input이 되고, loss function로 정확도만 계산. no gradient update

        if self.mode == "train":
            batch_size = x.size(0)
            target_length = targets.size(1)
            # targets to decoder input
            x = torch.Tensor().new_full((batch_size, 1), 0, dtype=torch.long, requires_grad=True)
            x = torch.cat([x, targets], dim=1)
            print("after cat : {}".format(x.size()))
            x = self.embedding(x)
            print("after embedding : {}".format(x.size()))
            x = F.dropout(x, self.dropout, training=self.training)
            residual = x
            """
            h_0: shape (num_layers * num_directions, batch, hidden_size): initial hidden state for each element in the batch. If the LSTM is bidirectional, num_directions should be 2, else it should be 1.
            c_0: shape (num_layers * num_directions, batch, hidden_size): initial cell state for each element in the batch.
            """
            print("initial states : {}".format(initial_states[0].size()))
            x, hidden = self.rnn(x, initial_states)
            x = (residual + x) * math.sqrt(0.5)
            residual = x
            x, _ = self.attention(x, encoder_hidden)
            x = (residual + x) * math.sqrt(0.5)

            predicted_softmax = F.log_softmax(self.out(x.view(-1, self.hidden_dim)), dim=-1)
            predicted_softmax = predicted_softmax.view(batch_size, self.decoder_vocab_size, -1)
            print("predicted_softmax: {}".format(predicted_softmax.size()))
            predicted_softmax = predicted_softmax.view(batch_size, targets, -1)
            # predicteed_softmax_list = torch.split(predicted_softmax, num_nodes.tolist())
            # predicted_softmax = torch.nn.utils.rnn.pad_sequence(predicteed_softmax_list, batch_first=True, padding_value=0)
        
            return predicted_softmax, None

        # x : list of graph embeddings
        elif self.mode == "test":
            batch_size = x.size(0)
            length = max(num_nodes)
            decoder_hidden = initial_states
            
            decoded_ids = torch.Tensor().new_full((batch_size, 1), 0, dtype=torch.long, requires_grad=False)
            
            def decode(step, output):
                if step in self.offsets:  # sample operation, should be in [3, 7]
                    if step != (self.n + 2) * (self.n - 1) / 2 - 1:
                        symbol = output[:, 3:6].topk(1)[1] + 3
                    else:
                        symbol = output[:, 6:].topk(1)[1] + 6
                else:  # sample connection, should be in [1, 2]
                    symbol = output[:, 1:3].topk(1)[1] + 1
                return symbol
            
            for i in range(length):
                x = self.embedding(decoded_ids[:, i:i+1])
                x = F.dropout(x, self.dropout, training=self.training)
                residual = x
                x, decoder_hidden = self.rnn(x, decoder_hidden)
                x = (residual + x) * math.sqrt(0.5)
                residual = x
                x, _ = self.attention(x, encoder_hidden)
                x = (residual + x) * math.sqrt(0.5)
                output = self.out(x.squeeze(1))
                symbol = decode(i, output)
                decoded_ids = torch.cat((decoded_ids, symbol), axis=-1)
                x = self.embedding(symbol)

            return None, decoded_ids

In [5]:
initial_states = graph_embedding[0].unsqueeze(0)
print("initial_states size : {}".format(initial_states.size()))
initial_states = tuple([initial_states, initial_states])

de = Decoder(
            mode="test",
            hidden_dim=conf.hidden_layer_dim * 4,
            embedding_vocab_size=9,
            decoder_vocab_size=7,
            dropout=conf.dropout,
            length=27,
            layers=1
            )

# graph_embedding
de(graph_embedding[0], num_nodes, initial_states=initial_states, encoder_hidden=encoder_hidden, targets=sequence)

initial_states size : torch.Size([1, 2, 64])


NameError: name 'decoder_input' is not defined

In [4]:
queue = torch.utils.data.DataLoader(dataset, batch_size=conf.batch_size, shuffle=True, pin_memory=False, collate_fn=utils.collate_fn)


import model_utils.configure as conf
from importlib import reload
from encoder import Encoder

encoder_hidden = []
graph_embedding = []
en = Encoder("train", conf.vocab_size, conf.hidden_layer_dim, "bi", 6, True, conf.dropout, conf.learning_rate)

i = 0
for step, sample in enumerate(queue):
    fw_adjs = sample['fw_adjs'] 
    bw_adjs = sample['bw_adjs'] 
    operations = sample['operations'] 
    num_nodes = sample['num_nodes'] 
    sequence = sample['sequence'] 

    encoder_hidden, graph_embedding = en(fw_adjs, bw_adjs, operations, num_nodes)
    break
    i+=1
    if i == 1: break
            

hop: 0, after aggregate: fw_hidden_size: torch.Size([14, 32])
hop: 1, after aggregate: fw_hidden_size: torch.Size([14, 32])
hop: 2, after aggregate: fw_hidden_size: torch.Size([14, 32])
hop: 3, after aggregate: fw_hidden_size: torch.Size([14, 32])
hop: 4, after aggregate: fw_hidden_size: torch.Size([14, 32])
hop: 5, after aggregate: fw_hidden_size: torch.Size([14, 32])


5
