In [1]:
import torch
from torch import nn
import numpy as np
from torch.autograd import Variable
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import Sampler
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, PackedSequence
import cPickle as pickle
import pandas as pd
from random import shuffle
from torch import optim
from torch.nn import functional as F 
from tqdm import tqdm,tqdm_notebook
from torch.nn.parameter import Parameter

from tensorboardX import SummaryWriter
import time

use_cuda = torch.cuda.is_available()
torch.cuda.device(0)

<torch.cuda.device at 0x7f7468710d90>

In [2]:
names = ["airplane","bench","bowl","cone","desk","flower_pot",
         "keyboard","mantel","person","radio","sofa","table",
         "tv_stand","xbox","bathtub","bookshelf","car","cup",
         "door","glass_box","lamp","monitor","piano","range_hood",
         "stairs","tent","vase","bed","bottle","chair","curtain",
         "dresser","guitar","laptop","night_stand","plant",
         "sink","stool","toilet","wardrobe"]

# Data

In [3]:
class MeshSampler(Sampler):
    def __init__(self,dataset,batch_size=32):
        self.epoch_size = len(dataset)
        self.batch_size = batch_size
        self.sample_ind = range(len(dataset))
    def __len__(self):
        return (self.epoch_size + self.batch_size - 1) // self.batch_size
    def __iter__(self):
        order = range(int(self.epoch_size/self.batch_size))
        shuffle(order)
        for i in order:
            start = i * self.batch_size
            end = min(start + self.batch_size, self.epoch_size)
            yield self.sample_ind[start:end]
from numpy import cross, eye, dot
from scipy.linalg import expm, norm

def renorm(data):
    print data.shape
    med = (np.max(data,axis=0) + np.min(data,axis=0))/2
    data -= med
    print med,np.max(np.linalg.norm(data,axis=1))
    data /= np.max(np.linalg.norm(data,axis=1))
    return data

def M(axis, theta):
    return expm(cross(eye(3), axis/norm(axis)*theta))
def augment(faces,theta,rotate):
    theta = theta*30*np.pi/180.
    if rotate and theta > 0:
        R = M([0,0,1],theta)
        faces = np.dot(faces,R)
    return faces
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res
class MeshData(Dataset):
    def __init__(self,root_directory, class_we_want, max_faces = 8000, max_vertices=5000):
        print "loading..."
        self.root = root_directory
        self.df = pd.read_pickle("sizes.pkl")
        print "done"
        self.df = self.df[(self.df.sizef<max_faces) &
                          (self.df.sizev<max_vertices) &
                          (self.df.cls_name==class_we_want) & 
                            (self.df.fname.str.contains("_0_"))]#.sort_values("sizef",ascending=False)
        self.max_faces = np.max(self.df.sizef.values)
        self.max_vertexes = np.max(self.df.sizev.values)
    def __len__(self):
        return len(self.df)*12
    def __getitem__(self, idx):
        i = idx//12
        theta = idx%12
        fname = self.df.iloc[i]['fname']
        end = fname.split("/")[-1]
        ftype = "_".join(fname.split("/")[-1].split("_")[:-4])
        fno = fname.split("_")[-1][:-4]
        #fname = "{}{}_{}_0.pkl".format(self.root,ftype,fno)
        data = pickle.load(open(fname,'rb'))
        faces = data['faces']
        vertices = data['vertices']
        faces = faces.reshape(-1,3)
        min_dist = np.min(np.linalg.norm(faces-vertices[0],axis=1))
        assert min_dist < 0.0001, "{}".format(min_dist)
        vertices = augment(vertices,theta,True)
        faces = augment(faces,theta,True)
        min_dist = np.min(np.linalg.norm(faces-vertices[0],axis=1))
        assert min_dist < 0.0001, "{}".format(min_dist)
        faces = faces.reshape(-1,9)
        assert len(faces) <= self.max_faces
        assert len(vertices) <= self.max_vertexes
        return torch.from_numpy(vertices).float(),torch.from_numpy(faces).float()
trainer = MeshData("../CleanedModels/pickle/","chair",3000,3000)

def fill_with_zeros(x,l):
    new_x = torch.zeros(l)
    new_x[:x.size(0)]=x
    return new_x
    
def pad_packer(x):
    faces,cls_idx = zip(*x)
    cls_idx = torch.cat(cls_idx)
    max_size = faces[0].size()
    lengths = [len(i) for i in faces]
    faces = [fill_with_zeros(i,max_size) for i in faces]
    faces = Variable(torch.stack(faces,0))
    return faces,cls_idx,lengths
train_loader = DataLoader(trainer,shuffle=True)

loading...
done


In [4]:
def find_nearest(x,y):
    now = time.time()
    x=x.view(3,1,3)
    yd = torch.stack([y]*3).squeeze()
    dist = yd-x
    dist = torch.sum(dist.pow_(2),dim=2)
    #dist[i,j] = distance from ith input to jth possibility
    #print dist[i,j],exdist
    math_time = time.time() - now
    k = 3
    final_ps = [None,None,None]
    
    now = time.time()
    mindist,idxs = torch.topk(dist,k,dim=1,largest=False)
    cnt = torch.arange(0,mindist.numel()).view_as(mindist).view(-1).cpu().numpy()
    idxs,mindist=idxs.view(-1).cpu().data.numpy(),mindist.view(-1).cpu().data.numpy()
    order = mindist.argsort()[::-1]
    d = mindist[order]
    distances = torch.FloatTensor([0,0,0])
    pts = cnt[order]
    lows = idxs[order]
    order_time = time.time() - now
    
    now = time.time()
    for i in range(len(cnt)):
        pt = int(pts[i])//k
        lowest = lows[i]
        non_nones = [p for p in final_ps if p is not None]
        if final_ps[pt] is not None:
            continue
        if lowest in non_nones:
            continue
        if final_ps[pt] is None:
            distances[pt] = float(d[i])
            final_ps[pt] = lowest
        if None not in final_ps:
            break
    #print final_ps
    for i in range(3):
        final_ps[i]=y[final_ps[i]]
    sort_time = time.time() - now
    
    now = time.time()
    final_ps = torch.stack(final_ps)
    
    distances = torch.sqrt(torch.sum(torch.pow(final_ps-x.squeeze(),2),dim=1))
    var_time = time.time() - now
    
    #print "math: {}, order: {}, sorting: {}, var: {}".format(math_time,order_time,sort_time,var_time)
    
    return final_ps,torch.mean(distances)


In [5]:
import itertools
#print [i for i in itertools.permutations([0,1,2], 3)]
def one_dim_order_invariant(x,y):
    # x = [3 x 3]
    # y = [3 x 3]
    #print x.size(),y.size()
    d = 10
    for p in itertools.permutations([0,1,2], 3):
        td = 0
        temp = torch.zeros_like(x)
        for i,c in enumerate(p):
            temp[i] = x[c]-y[i]
        #print torch.norm(temp,2,0,True).size()
        td = torch.sum(torch.norm(temp,2,0))
        #print td
        if td < d:
            #print "td: {}".format(td)
            d = td
    assert d < 10
    return d
    
def two_dim_order_invariant(x,y):
    '''find the closest face in y to a given face x'''
    # x = [3 x 3]
    # y = [n x 3 x 3]
    x,y = x.view(3,3),y.view(-1,3,3)
    xx = torch.stack([x]*6)
    #print xx[0]
    for i,p in enumerate(itertools.permutations([0,1,2], 3)):
        for j,c in enumerate(p):
            xx[i,j] = x[c]
    
    yy = y.unsqueeze(1).expand(-1,6,-1,-1)
    zz = yy-xx
    pp = torch.sum(torch.norm(zz,2,-1),-1)
    pp,_ = torch.min(pp,1)
    d,idx = torch.min(pp,0)
    best_face = y[idx]
    return best_face.view(1,9),d

#two_dim_order_invariant(Variable(torch.rand(3,3)),Variable(torch.rand(200,9)))

(Variable containing:
  0.4082  0.4704  0.0607  0.4130  0.5701  0.5928  0.2203  0.6950  0.7860
 [torch.FloatTensor of size 1x9], Variable containing:
  0.7038
 [torch.FloatTensor of size 1])

# Model

In [31]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('LSTM') != -1:
        nn.init.xavier_normal(m.weight_ih_l0.data)
        nn.init.xavier_normal(m.weight_hh_l0.data)
        nn.init.constant(m.bias_ih_l0.data,0.001)
        nn.init.constant(m.bias_hh_l0.data,0.001)
    elif classname.find('Linear') != -1:
        nn.init.xavier_normal(m.weight.data)
        nn.init.constant(m.bias.data,0.001)

class VertexTranslationEncoder(nn.Module):
    def __init__(self,enc_lstm_units,in_dims=3,bi=True):
        super(VertexTranslationEncoder, self).__init__()
        self.hidden_units = enc_lstm_units
        self.lstm = nn.LSTM(3, self.hidden_units,bidirectional=bi)
    def forward(self,vertexes,hidden):
        vertexes = vertexes.view(1,1,-1)
        return self.lstm(vertexes,hidden)

class VertexTranslationDecoderWithAttention(nn.Module):
    def __init__(self,dec_lstm_units,max_length=1000,out_dims=9,bi=True):
        super(VertexTranslationDecoderWithAttention, self).__init__()
        self.dec_hidden_size = dec_lstm_units
        self.output_size = out_dims
        self.max_length = max_length
        self.dec_attn = nn.Linear(self.dec_hidden_size + self.output_size, self.max_length)
        self.dec_attn_combine = nn.Linear(self.dec_hidden_size + self.output_size, self.dec_hidden_size)
        self.dec_lstm = nn.LSTM(self.dec_hidden_size, self.dec_hidden_size)
        self.out = nn.Linear(self.dec_hidden_size, self.output_size)
        self.attn_time = 0
        self.lstm_time = 0
    def forward(self, input, hidden, encoder_outputs):
        input = input.view(1, 1, -1)
        #print input.size(),hidden[0].size()
        x = torch.cat((input[0], hidden[0][0]), 1)
        #print x.size()
        attn_weights = F.softmax(self.dec_attn(x),dim=1)
        assert torch.sum(attn_weights).data[0] < 1.1, "{}".format(torch.sum(attn_weights).data[0])
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))
        

        output = torch.cat((input[0], attn_applied[0]), 1)
        output = self.dec_attn_combine(output).unsqueeze(0)
        output, hidden = self.dec_lstm(output, hidden)
        output = self.out(output[0])
        return output, hidden, attn_weights

In [46]:

def train_once(input_variable,target_variable,encoder,decoder,e_o,d_o,max_length=1000):
    input_length = input_variable.size()[0]
    target_length = target_variable.size()[0]
    e_o.zero_grad()
    d_o.zero_grad()
    encoder_hidden = None

    encoder_outputs = Variable(torch.zeros(max_length, decoder.dec_hidden_size))
    encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs
    loss = 0
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_variable[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0][0]
    decoder_input = Variable(torch.FloatTensor([[0,0,0,0,0,0,0,0,0]]))
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = [e.view(1,1,-1) for e in encoder_hidden]
    choices = input_variable.squeeze()
    vert_loss = 0
    face_loss = 0
    #two types of losses:
    # loss of generating vertex not from the list
    # loss of generating face not from list
    snap_time = 0
    decode_time = 0
    face_time = 0
    raw_faces = []
    snapped_faces = []
    fixed_faces = []
    for di in range(target_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)
        snapped_face,d = find_nearest(decoder_output,choices)
        vert_loss += d
        tg, d = two_dim_order_invariant(snapped_face,target_variable)
        face_loss += d
        fixed_faces.append(tg)
        snapped_faces.append(snapped_face)
        raw_faces.append(decoder_output)
        decoder_input = tg  # Teacher forcing
        #print "decode: {}, snap: {}, face: {}".format(decode_time,snap_time,face_time)
    loss = face_loss+vert_loss
    loss.backward()
    snapped_faces = torch.cat(snapped_faces)
    fixed_faces = torch.cat(fixed_faces)
    raw_faces = torch.cat(raw_faces)
    print fixed_faces.size(),target_variable.size()
    # truth to predicted loss
    return
    if global_counter % genevery == 0:
        write_off(fixed_faces.data.cpu().numpy(),"genobjs/fixed_{}.off".format(global_counter))
        write_off(snapped_faces.data.cpu().numpy(),"genobjs/snapped_{}.off".format(global_counter))
        write_off(raw_faces.data.cpu().numpy(),"genobjs/raw_{}.off".format(global_counter))
    clip = 0.25
    torch.nn.utils.clip_grad_norm(encoder.parameters(), clip)
    torch.nn.utils.clip_grad_norm(decoder.parameters(), clip)
#     e_o.step()
#     d_o.step()
    return face_loss,vert_loss
train_once(vertices,faces,encoder,decoder,encoder_optimizer,decoder_optimizer,max_length)

torch.Size([2148, 9]) torch.Size([2148, 1, 9])


In [33]:
h_dims = 200
max_length = max(trainer.max_faces,trainer.max_vertexes)
encoder = VertexTranslationEncoder(h_dims)
decoder = VertexTranslationDecoderWithAttention(h_dims*2,max_length=max_length)
if use_cuda:
    encoder,decoder = encoder.cuda(),decoder.cuda()

encoder_optimizer = optim.Adam(encoder.parameters(),lr=0.0001)
decoder_optimizer = optim.Adam(decoder.parameters(),lr=0.0001)

In [35]:
# tensorboard things
writer = SummaryWriter()
global_counter = 0 

#vandf.load_state_dict(torch.load("facer.tar"))

In [43]:
genevery = 10
import time
for i in range(30):
    for idx,(vertices,faces) in enumerate(train_loader):
        vertices,faces = vertices.view(-1,1,3),faces.view(-1,1,9)
        vertices,faces = Variable(vertices),Variable(faces)
        target_length = faces.size(0)
        if use_cuda:
            faces=faces.cuda()
            vertices=vertices.cuda()
        #print faces.size()
        face_loss,vert_loss = train_once(vertices,faces,encoder,decoder,
                                         encoder_optimizer,decoder_optimizer,max_length)
        writer.add_scalars("Losses",{"snapping":vert_loss.data[0]/target_length,
                                     "faceloss":face_loss.data[0]/target_length},global_counter)
        global_counter+=1

7 3543
9 3543
186 3543
6 234
5 234
126 234
6 1800
10 1800
146 1800


KeyboardInterrupt: 

In [37]:
torch.save(encoder,"encoder.th")
torch.save(decoder,"decoder.th")

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


In [38]:
def write_off(data,fname):
    data = data.reshape(-1,3)
    points = set()
    for d in data:
        points.add(tuple(d))
    vertexes = list(points)
    print len(vertexes),len(data)
    faces = []
    for d in data:
        faces.append(vertexes.index(tuple(d)))
    faces = np.array(faces)
    faces = faces.reshape(-1,3)
    with open(fname,'w') as openfile:
        openfile.write("OFF\n")
        openfile.write("{} {} 0\n".format(len(vertexes),len(faces)))
        for v in vertexes:
            openfile.write("{} {} {}\n".format(v[0],v[1],v[2]))
        for f in faces:
            openfile.write("3 {} {} {}\n".format(f[0],f[1],f[2]))
