In [28]:
import torch
import torchvision.models as models
from torch.autograd import Variable

inputs = torch.randn(2, 3, 224, 224)
vin1 = Variable(inputs, requires_grad=True)
vin2 = Variable(inputs[0].unsqueeze(0), requires_grad=True)
vin3 = Variable(inputs[1].unsqueeze(0), requires_grad=True)
labels = torch.LongTensor([1,0])
lab1 = Variable(labels)
lab2 = Variable(torch.LongTensor([1]))
lab3 = Variable(torch.LongTensor([0]))

net = models.resnet152(pretrained=True).eval()
loss = torch.nn.CrossEntropyLoss(size_average=False)
out1 = net(vin1)
l1 = loss(out1, lab1)
l1.backward()

v1g = vin1.grad.clone()
v1fcg = net.fc.weight.grad.clone()
v1conv1g = net.conv1.weight.grad.clone()

for p in net.parameters():
    void = p.grad.data.zero_()

out2 = net(vin2)
l2 = loss(out2, lab2)
l2.backward()
out3 = net(vin3)
l3 = loss(out3, lab3)
l3.backward()

v2g = vin2.grad.clone()
v2fcg = net.fc.weight.grad.clone()
v2conv1g = net.conv1.weight.grad.clone()
v3g = vin3.grad.clone()

In [29]:
print(l1)
print(l2)
print(l3)

def cmp(v1, v2):
    return (torch.abs(v1 - v2) > 1e-3).long().sum().data[0]
print(cmp(out1[0], out2[0]))
print(cmp(out1[1], out3[0]))
print(cmp(v1g[1], v3g[0]))
print(cmp(v1fcg, v2fcg))
print(cmp(v1conv1g, v2conv1g))

Variable containing:
 18.5222
[torch.FloatTensor of size 1]

Variable containing:
 8.8922
[torch.FloatTensor of size 1]

Variable containing:
 9.6300
[torch.FloatTensor of size 1]

0
0
0
0
0


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.autograd import Variable
import random
import time
from utils import *
from model.custom_modules import TripletLoss

In [2]:
num_classes = 1000
epochs = 1
mini_batch_size = 64
micro_batch_size = 1
alpha = 1.0
image_size = (3, 300, 400)

In [3]:
dataset = [(torch.randn(*image_size), random.randrange(num_classes)) for _ in range(200)]

In [4]:
class WeirdNet(nn.Module):
    def __init__(self, net, k):
        super(WeirdNet, self).__init__()
        self.net = net
        self.k = k
        self.conv_final = nn.Conv2d(2048, 1000, (1, 1))
    
    def forward_single(self, x):
        x = self.net.conv1(x)
        x = self.net.bn1(x)
        x = self.net.relu(x)
        x = self.net.maxpool(x)
        x = self.net.layer1(x)
        x = self.net.layer2(x)
        x = self.net.layer3(x)
        
        feat = self.net.layer4(x)
        x = self.net.avgpool(feat)
        x = self.conv_final(x)
        
        c_maxv, _ = x.max(1)
        c_maxv = c_maxv.view(-1)
        k = min(c_maxv.size(0), self.k)
        _, flat_idx = c_maxv.topk(k)

        def feature_idx(flat_idx):
            cls_idx = flat_idx // x.size(3), flat_idx % x.size(3)
            return (cls_idx[0], cls_idx[0] + 7,
                    cls_idx[1], cls_idx[1] + 7)
        top_idx = [feature_idx(int(i)) for i in flat_idx.data]
        # needed for output
        tmp = c_maxv.data.clone().resize_(x.size(0), 2048)
        acc = Variable(tmp.fill_(0))
        tmp = c_maxv.data.clone().resize_(x.size(0), x.size(1), self.k)
        cls_out = Variable(tmp.fill_(0))

        i = 0
        for x1, x2, y1, y2 in top_idx:
            cls_out[:, :, i] = x[:, :, x1, y1]
            i += 1
            region = feat[:, :, x1, y1]
            acc = acc + region
        return acc, cls_out

#         k = min(self.k, x.size(2) * x.size(3))
#         tmp = Variable(feat.data.clone().resize_(feat.size(0), feat.size(1)).fill_(2))
#         cls_out = Variable(x.data.clone().resize_(x.size(0), x.size(1), k).fill_(0))
#         for i in range(k):
#             tmp = tmp + feat[:, :, random.randrange(feat.size(2)), random.randrange(feat.size(3))]
#             cls_out[:, :, i] = x[:, :, random.randrange(x.size(2)), random.randrange(x.size(3))]
#         return tmp, cls_out
    
    def forward(self, x1, x2, x3):
        return self.forward_single(x1), self.forward_single(x2), self.forward_single(x3)

In [5]:
net = models.resnet152(pretrained=True)
for layer in [net.conv1, net.bn1, net.layer1, net.layer2, net.layer3]:
    for p in layer.parameters():
        p.requires_grad = False
weird_net = WeirdNet(net, 4).cuda().train()
criterion1 = TripletLoss(0.1)
criterion2 = nn.CrossEntropyLoss()
optimizer = optim.SGD((p for p in weird_net.parameters() if p.requires_grad), lr=1e-3)

In [6]:
def micro_batch(last, i, is_final, batch):
    t1_in = torch.Tensor(len(batch), *image_size).cuda()
    t2_in = torch.Tensor(len(batch), *image_size).cuda()
    t3_in = torch.Tensor(len(batch), *image_size).cuda()
    lab1_in = torch.Tensor(len(batch)).cuda()
    lab2_in = torch.LongTensor(len(batch)).cuda()
    for j, (im, lab) in enumerate(batch):
        t1_in[j] = im
        im2 = random.choice(dataset)[0]
        t2_in[j] = im2
        im3 = random.choice(dataset)[0]
        t3_in[j] = im3
        lab1_in[j] = random.choice([1, -1])
        lab2_in[j] = lab
    t1_in = Variable(t1_in)
    t2_in = Variable(t2_in)
    t3_in = Variable(t3_in)
    lab1_in = Variable(lab1_in)
    lab2_in = Variable(lab2_in)
    out = weird_net(t1_in, t2_in, t3_in)
    loss = criterion1(out[0][0], out[1][0], out[2][0])
    cls_out = out[0][1]
    loss2 = criterion2(cls_out[:, :, 0], lab2_in)
    k = cls_out.size(2)
    for i in range(1, k):
        loss2 += criterion2(cls_out[:, :, i], lab2_in)
    loss2 /= k
    loss += alpha * loss2
    loss.backward()
    print('Epoch: {0}, Batch: {1}, Micro idx: {2}'.format(ep, last, j))
    time.sleep(4)
    return last

def mini_batch(last, i, is_final, batch):
    optimizer.zero_grad()
    fold_batches(micro_batch, last, batch, micro_batch_size, cut_end=True)
    optimizer.step()
    return last + 1

In [None]:
def create_epoch(epoch, dataset, test_set):
    return dataset, {}, {}

def create_batch(batch, n):
    t1_in = torch.Tensor(len(batch), *image_size).cuda()
    t2_in = torch.Tensor(len(batch), *image_size).cuda()
    t3_in = torch.Tensor(len(batch), *image_size).cuda()
    lab1_in = torch.Tensor(len(batch)).cuda()
    lab2_in = torch.LongTensor(len(batch)).cuda()
    for j, (im, lab) in enumerate(batch):
        t1_in[j] = im
        im2 = random.choice(dataset)[0]
        t2_in[j] = im2
        im3 = random.choice(dataset)[0]
        t3_in[j] = im3
        lab1_in[j] = random.choice([1, -1])
        lab2_in[j] = lab
    return [t1_in, t2_in, t3_in], [lab1_in, lab2_in]

def create_loss(out, labels_in):
    # out is a tuple of 3 tuples, each for the descriptor
    # and a tensor with all classification results for the highest
    # classification values. the first loss is a simple loss on the
    # descriptors. the second loss is a classification loss for
    # each sub-region of the input. we simply sum-aggregate here
    loss = criterion1(out[0][0], out[1][0], out[2][0])
    cls_out = out[0][1]
    loss2 = criterion2(cls_out[:, :, 0], labels_in[1])
    k = cls_out.size(2)
    for i in range(1, k):
        loss2 += criterion2(cls_out[:, :, i], labels_in[1])
    loss2 /= k
    return loss, loss2

def micro_batch_gen(last, i, is_final, batch):
    prev_loss, mini_batch_size = last
    n = len(batch)
    tensors_in, labels_in = create_batch(batch, n, **batch_args)
    tensors_out = weird_net(*(Variable(t) for t in tensors_in))
    loss, loss2 = create_loss(tensors_out, [Variable(l) for l in labels_in])
    loss_micro = loss * n / mini_batch_size
    val = loss.data[0]
    if loss2 is not None:
        loss2_micro = loss2 * n / mini_batch_size
        val += alpha * (loss2.data[0])
        loss_micro += alpha * loss2_micro
    loss_micro.backward()
    print('Epoch: {0}, Micro batch idx: {1}'.format(epoch, i))
    time.sleep(4)
    return prev_loss + val, mini_batch_size

def mini_batch_gen(last, i, is_final, batch):
    batch_count, score, running_loss = last
    optimizer.zero_grad()
    loss, _ = fold_batches(micro_batch_gen, (0.0, len(batch)), batch, micro_batch_size)
    optimizer.step()
    return batch_count + 1, score, running_loss

for epoch in range(epochs):
    dataset, batch_args, stats_args = create_epoch(epoch, dataset, [])

    init = 0, 0, 0.0  # batch count, score, running loss
    _, best_score, _ = fold_batches(mini_batch_gen, init, dataset, mini_batch_size, cut_end=True)

Epoch: 0, Micro batch idx: 0
Epoch: 0, Micro batch idx: 1
Epoch: 0, Micro batch idx: 2
Epoch: 0, Micro batch idx: 3
Epoch: 0, Micro batch idx: 4
Epoch: 0, Micro batch idx: 5
Epoch: 0, Micro batch idx: 6
Epoch: 0, Micro batch idx: 7
Epoch: 0, Micro batch idx: 8
Epoch: 0, Micro batch idx: 9
Epoch: 0, Micro batch idx: 10
Epoch: 0, Micro batch idx: 11


In [None]:
for ep in range(epochs):
    random.shuffle(dataset)
    fold_batches(mini_batch, 0, dataset, mini_batch_size, cut_end=True)

Epoch: 0, Batch: 0, Micro idx: 0
Epoch: 0, Batch: 0, Micro idx: 0
Epoch: 0, Batch: 0, Micro idx: 0
Epoch: 0, Batch: 0, Micro idx: 0
Epoch: 0, Batch: 0, Micro idx: 0
Epoch: 0, Batch: 0, Micro idx: 0
Epoch: 0, Batch: 0, Micro idx: 0


In [1]:
import torch
from train_siam_main import main
from test_params import P
import traceback

with torch.cuda.device(P.cuda_device):
    try:
        main()
    except:
        P.log_detail(None, traceback.format_exc())
        raise

Loading and transforming train/test sets.
Starting descriptor training
#pos (without order, with duplicates):15258
(0, 0.3049474395811558)
(1, 0.38165345788002014)
(2, 0.3234449662268162)
(3, 0.34544791281223297)
(4, 0.318661592900753)
(5, 0.30059607699513435)
(6, 0.37856121733784676)
(7, 0.2738928832113743)
(8, 0.3072989135980606)
(9, 0.2927310988306999)
(10, 0.3443635180592537)
(11, 0.36313677206635475)
(12, 0.2759423814713955)
(13, 0.4019693583250046)
(14, 0.30114175379276276)
(15, 0.28684981912374496)
(16, 0.35947710275650024)
(17, 0.3253849931061268)
(18, 0.3229123614728451)
(19, 0.3405882641673088)
(20, 0.2589573413133621)
(21, 0.32149915024638176)
(22, 0.3255244642496109)
(23, 0.44003306329250336)
(24, 0.3810008689761162)
(25, 0.26335681043565273)
(26, 0.40452470630407333)
(27, 0.36283138021826744)
(28, 0.3409733884036541)
(29, 0.34127793461084366)
(30, 0.3740284889936447)
(31, 0.2719874605536461)
(32, 0.3513811342418194)
(33, 0.36447054147720337)
(34, 0.34081560373306274)
(35, 

KeyboardInterrupt: 