In [6]:
import torch
import torch.nn as nn
import numpy as np
from  scipy.ndimage import zoom as imzoom
import sys
import os

from PIL import Image
from matplotlib import mlab
import matplotlib.pyplot as plt
import numpy as np
from pytorch_sift import SIFTNet
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
import torch.optim as optim
from tqdm import tqdm

USE_CUDA = False

LOG_DIR = 'log_snaps'
BASE_LR = 0.00000001
from SpatialTransformer2D import SpatialTransformer2d
from HardNet import HardNet
hardnet = HardNet()
checkpoint = torch.load('HardNetLib.pth')
hardnet.load_state_dict(checkpoint['state_dict'])

class SparseImgRepresenter(nn.Module):
    def __init__(self, 
             detector_net = None,
             descriptor_net = None,    
             use_cuda = False):
        super(SparseImgRepresenter, self).__init__()
        self.detector = detector_net;
        self.descriptor = descriptor_net;
        return
    def forward(self, input_img, skip_desc = False):
        aff_norm_patches, LAFs = self.detector(input_img)
        if not skip_desc:
            descs = self.descriptor(aff_norm_patches);
            return aff_norm_patches, LAFs, descs
        return aff_norm_patches, LAFs

detnet = nn.Sequential(
                nn.Conv2d(1, 16, kernel_size=3, padding = 1),
                nn.ReLU(),
                nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
                nn.ReLU(),
                nn.Conv2d(16, 32, kernel_size=3, stride=2,padding=1),
                nn.ReLU()
            )
ConvST_net = SpatialTransformer2d( num_input_channels = 1,
                 num_ouput_channels = 32,
                 feature_net = None,
                 out_patch_size = 16,
                 out_stride = 16,
                 min_zoom = 0.9,
                 max_zoom = 1.1,
                 min_tilt = 0.9,
                 max_tilt = 1.1,
                 max_rot = 0.1,
                 max_shift = 0.5,
                 mrSize = 1.0, use_cuda = USE_CUDA)
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.xavier_normal(m.weight.data)

#SIRNet = SparseImgRepresenter(detector_net = ConvST_net, descriptor_net = hardnet)
#aff_norm_patches, LAFs, descs = SIRNet(var_image_reshape)

def distance_matrix_vector(anchor, positive):
    """Given batch of anchor descriptors and positive descriptors calculate distance matrix"""

    d1_sq = torch.sum(anchor * anchor, dim=1)
    d2_sq = torch.sum(positive * positive, dim=1)
    eps = 1e-6
    return torch.sqrt(torch.abs((d1_sq.expand(positive.size(0), anchor.size(0)) +
                       torch.t(d2_sq.expand(anchor.size(0), positive.size(0)))
                      - 2.0 * torch.bmm(positive.unsqueeze(0), torch.t(anchor).unsqueeze(0)).squeeze(0))+eps))
def LAFs_to_H_frames(aff_pts, use_cuda = False):
    H3_x = torch.Tensor([0, 0, 1 ]).unsqueeze(0).unsqueeze(0).expand_as(aff_pts[:,0:1,:]);
    H3_x = torch.autograd.Variable(H3_x)
    if use_cuda:
        H3_x = H3_x.cuda()
    return torch.cat([aff_pts, H3_x], dim = 1)
def reproject_to_canonical_Frob_batched(LHF1_inv, LHF2, batch_size = 2, use_cuda = False):
    out = torch.autograd.Variable(torch.zeros((LHF1_inv.size(0), LHF2.size(0))))
    eye1 = torch.autograd.Variable(torch.eye(3), requires_grad = False)
    if use_cuda:
        out = out.cuda()
        eye1 = eye1.cuda()
    len1 = LHF1_inv.size(0)
    len2 = LHF2.size(0)
    n_batches = int(np.floor(len1 / batch_size) + 1);
    for b_idx in range(n_batches):
        #print b_idx
        start = b_idx * batch_size;
        fin = min((b_idx+1) * batch_size, len1)
        current_bs = fin - start
        if current_bs == 0:
            break
        resh1 = LHF1_inv[start:fin, :, :].unsqueeze(0).expand(len2,current_bs, 3, 3)
        resh1 = resh1.contiguous().view(-1,3,3);
        should_be_eyes = torch.bmm(resh1,
                                   LHF2.unsqueeze(1).expand(len2,current_bs, 3,3).contiguous().view(-1,3,3))
        out[start:fin, :] = torch.sum((should_be_eyes - eye1.unsqueeze(0).expand_as(should_be_eyes))**2, dim=1).sum(dim = 1).view(current_bs, len2)
    return out

def get_GT_correspondence_indexes(aff_pts1,aff_pts2, H1to2, dist_threshold = 4, use_cuda = False):
    LHF2 = LAFs_to_H_frames(aff_pts2, use_cuda = use_cuda)
    LHF2_reprojected_to_1 = torch.bmm(H1to2.expand_as(LHF2), LHF2);
    LHF2_reprojected_to_1 = LHF2_reprojected_to_1 / LHF2_reprojected_to_1[:,2:,2:].expand_as(LHF2_reprojected_to_1);
    just_centers1 = aff_pts1[:,:,2];
    just_centers2_repr_to_1 = LHF2_reprojected_to_1[:,0:2,2];
    dist  = distance_matrix_vector(just_centers2_repr_to_1, just_centers1)
    min_dist, idxs_in_2 = torch.min(dist,1)
    plain_indxs_in1 = torch.autograd.Variable(torch.arange(0, idxs_in_2.size(0)),requires_grad = False)
    if use_cuda:
        plain_indxs_in1 = plain_indxs_in1.cuda()
    mask =  min_dist <= dist_threshold
    return min_dist[mask], plain_indxs_in1[mask], idxs_in_2[mask]

def get_GT_correspondence_indexes_Fro(aff_pts1,aff_pts2, H1to2, dist_threshold = 4, use_cuda = False):
    LHF2 = LAFs_to_H_frames(aff_pts2, use_cuda = use_cuda)
    LHF2_reprojected_to_1 = torch.bmm(H1to2.expand_as(LHF2), LHF2);
    LHF2_reprojected_to_1 = LHF2_reprojected_to_1 / LHF2_reprojected_to_1[:,2:,2:].expand_as(LHF2_reprojected_to_1);
    LHF1 = LAFs_to_H_frames(aff_pts1, use_cuda = False)
    
    LHF1_inv = torch.autograd.Variable(torch.zeros(LHF1.size()))
    if use_cuda:
        LHF1_inv = LHF1_inv.cuda()
    for i in range(len(LHF1_inv)):
        LHF1_inv[i,:,:] = LHF1[i,:,:].inverse()
    frob_norm_dist = reproject_to_canonical_Frob_batched(LHF1_inv, LHF2_reprojected_to_1, batch_size = 2, use_cuda = use_cuda)
    #just_centers1 = aff_pts1[:,:,2];
    #just_centers2_repr_to_1 = LHF2_reprojected_to_1[:,0:2,2];
    #dist  = distance_matrix_vector(just_centers2_repr_to_1, just_centers1)
    min_dist, idxs_in_2 = torch.min(frob_norm_dist,1)
    plain_indxs_in1 = torch.autograd.Variable(torch.arange(0, idxs_in_2.size(0)))
    if use_cuda:
        plain_indxs_in1 = plain_indxs_in1.cuda()
    mask =  min_dist <= dist_threshold
    return min_dist[mask], plain_indxs_in1[mask], idxs_in_2[mask]

def adjust_learning_rate(optimizer):
    """Updates the learning rate given the learning rate decay.
    The routine has been implemented according to the original Lua SGD optimizer
    """
    n_triplets = 116*5.
    n_epochs = 10.
    for group in optimizer.param_groups:
        if 'step' not in group:
            group['step'] = 0.
        else:
            group['step'] += 1.
        group['lr'] =  BASE_LR #*  .0 - float(group['step']) * float(1.0) / (n_triplets * float(n_epochs)))
    return

def create_optimizer(model, new_lr, wd):
    # setup optimizer
    optimizer = optim.SGD(model.parameters(), lr=new_lr,
                          momentum=0.5, dampening=0.5,
                          weight_decay=wd)
    return optimizer

def create_loaders(load_random_triplets = False):

    kwargs = {'num_workers': 2, 'pin_memory': True} if True else {}

    transform = transforms.Compose([
            transforms.ToTensor()])
    #        transforms.Normalize((args.mean_image,), (args.std_image,))])

    train_loader = torch.utils.data.DataLoader(
            dset.HPatchesSeq('/home/old-ufo/dev/LearnedDetector/dataset/', 'a',
                             train=True, transform=None, 
                             download=True), batch_size = 1,
        shuffle = False, **kwargs)

    test_loader = torch.utils.data.DataLoader(
            dset.HPatchesSeq('/home/old-ufo/dev/LearnedDetector/dataset/', 'a',
                             train=False, transform=None, 
                             download=True), batch_size = 1,
        shuffle = False, **kwargs)

    return train_loader, test_loader

train_loader, test_loader = create_loaders()


# Found cached data /home/old-ufo/dev/LearnedDetector/dataset/hpatches-sequences-release_a_train.pt
# Found cached data /home/old-ufo/dev/LearnedDetector/dataset/hpatches-sequences-release_a_test.pt


In [2]:
anc = torch.from_numpy(np.random.random((125,10)))
pos = torch.from_numpy(np.random.random((34,10)))
dm = distance_matrix_vector(anc,pos)
print dm
min_dist, idxs_in_2 = torch.min(dm,1)
print min_dist.shape, idxs_in_2.shape, min_dist
print np.sum(np.isnan(dm.numpy()))


 0.9670  1.3720  1.3227  ...   1.3188  1.0672  1.1342
 1.1052  1.0927  1.0343  ...   1.2392  0.9956  1.0502
 1.0429  1.4750  1.2408  ...   1.0685  0.9707  1.2900
          ...             ⋱             ...          
 0.9236  1.2108  1.0485  ...   1.0474  1.2338  1.0589
 1.2955  1.7815  1.6110  ...   1.3607  1.2053  1.4766
 1.1045  1.1967  1.3532  ...   1.1709  1.3722  1.5626
[torch.DoubleTensor of size 34x125]

torch.Size([34]) torch.Size([34]) 
 0.6738
 0.6672
 0.5441
 0.6653
 0.6209
 0.5004
 0.7529
 0.5671
 0.5135
 0.8085
 0.5102
 0.7387
 0.7693
 0.4912
 0.7294
 0.7124
 0.6740
 0.6596
 0.5502
 0.6298
 0.7058
 0.6127
 0.6672
 0.6665
 0.6601
 0.6159
 0.7656
 0.7316
 0.5576
 0.4161
 0.6304
 0.5716
 0.5736
 0.6415
[torch.DoubleTensor of size 34]

0


In [3]:

SIRNet = SparseImgRepresenter(detector_net = ConvST_net, descriptor_net = SIFTNet(patch_size = 16, do_cuda = USE_CUDA))
SIRNet.detector.apply(weights_init)

model = SIRNet
if USE_CUDA:
    model = model.cuda()

optimizer1 = create_optimizer(model, BASE_LR, 5e-5)


In [7]:

def train(train_loader, model, optimizer, epoch, cuda = True):
    # switch to train mode
    model.train()
    log_interval = 1
    spatial_only = True
    pbar = enumerate(train_loader)
    for batch_idx, data in pbar:
        print 'Batch idx', batch_idx
        #print model.detector.shift_net[0].weight.data.cpu().numpy()
        img1, img2, H  = data
        #if np.abs(np.sum(H.numpy()) - 3.0) > 0.01:
        #    continue
        H = H.squeeze(0)
        img1 = img1.float().squeeze(0)
        img1 = img1 - img1.mean()
        img1 = img1 / 50.#(img1.std() + 1e-8)
        img2 = img2.float().squeeze(0)
        img2 = img2 - img2.mean()
        img2 = img2 / 50.#(img2.std() + 1e-8)
        if cuda:
            img1, img2, H = img1.cuda(), img2.cuda(), H.cuda()
        img1, img2, H = Variable(img1), Variable(img2), Variable(H)
        aff_norm_patches1, LAFs1 = model(img1, skip_desc = True)
        aff_norm_patches2, LAFs2 = model(img2, skip_desc = True)
        fro_dists, idxs_in1, idxs_in2 = get_GT_correspondence_indexes_Fro(LAFs1, LAFs2, H, dist_threshold = 1.0, use_cuda = cuda);
        if  len(fro_dists.size()) == 0:
            optimizer.zero_grad()
            print 'skip'
            continue
        loss = fro_dists.mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #adjust_learning_rate(optimizer)
        print epoch,batch_idx, loss.data.cpu().numpy()[0]

    torch.save({'epoch': epoch + 1, 'state_dict': model.state_dict()},
               '{}/checkpoint_{}.pth'.format(LOG_DIR, epoch))


In [8]:

start = 0
end = 10
for epoch in range(start, end):
    print 'epoch', epoch
    if USE_CUDA:
        model = model.cuda()
    train(train_loader, model, optimizer1, epoch, cuda = USE_CUDA)


epoch 0
Batch idx 0
0 0 0.0464067
Batch idx 1
0 1 0.0472269
Batch idx 2
0 2 0.0476021
Batch idx 3
0 3 0.0489703
Batch idx 4
0 4 0.0498334
Batch idx 5


Process Process-4:
Process Process-3:
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
Traceback (most recent call last):
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
    self.run()
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/usr/local/lib/python2.7/dist-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python2.7/multiprocessing/queues.py", line 376, in get
    r = index_queue.get()
    racquire()
  File "/usr/lib/python2.7/multiprocessing/queues.py", line 378, in get
KeyboardInterrupt
    return recv()
  File 

KeyboardInterrupt: 

In [4]:
cuda = False
model.train()
log_interval = 1
spatial_only = True
pbar = enumerate(train_loader)
for batch_idx, data in pbar:
    #print model.detector.shift_net[0].weight.data.cpu().numpy()
    img1, img2, H  = data
    #if np.abs(np.sum(H.numpy()) - 3.0) > 0.01:
    #    continue
    H = H.squeeze(0)
    img1 = img1.float().squeeze(0)
    img1 = img1 - img1.mean()
    img1 = img1 / 50.#(img1.std() + 1e-8)
    img2 = img2.float().squeeze(0)
    img2 = img2 - img2.mean()
    img2 = img2 / 50.#(img2.std() + 1e-8)
    if cuda:
        img1, img2, H = img1.cuda(), img2.cuda(), H.cuda()
    img1, img2, H = Variable(img1), Variable(img2), Variable(H)
    aff_norm_patches1, LAFs1 = model(img1, skip_desc = True)
    aff_norm_patches2, LAFs2 = model(img2, skip_desc = True)
    break

In [5]:
LHF2 = LAFs_to_H_frames(LAFs2, use_cuda = False)
LHF2_reprojected_to_1 = torch.bmm(H.expand_as(LHF2), LHF2);
LHF2_reprojected_to_1 = LHF2_reprojected_to_1 / LHF2_reprojected_to_1[:,2:,2:].expand_as(LHF2_reprojected_to_1);
LHF1 = LAFs_to_H_frames(LAFs1, use_cuda = False)

In [6]:
LHF1_inv = torch.autograd.Variable(torch.zeros(LHF1.size()))
if False:
    LHF1_inv = LHF1_inv.cuda()
for i in range(len(LHF1_inv)):
    LHF1_inv[i,:,:] = LHF1[i,:,:].inverse()
#should_be_eye = torch.bmm(LHF1_inv, LHF2_reprojected_to_1)

In [28]:
should_be_eye = torch.bmm(LHF1_inv.expand(), LHF2_reprojected_to_1)
print should_be_eye.shape

torch.Size([1024, 3, 3])


In [29]:
print should_be_eye[0,:,:]

Variable containing:
 0.9988  0.0000 -0.0022
-0.0000  0.9993 -0.0073
 0.0000  0.0000  1.0000
[torch.FloatTensor of size 3x3]



In [11]:
A2_1 =  LHF2_reprojected_to_1[0,:,:]
A1 =  LHF1[0,:,:]


In [23]:
should_be_eye = torch.matmul(A1.inverse(), A2_1)
frob_norm_err = torch.nn.MSELoss()(should_be_eye, torch.autograd.Variable(torch.eye(3)))

In [24]:
frob_norm_err

Variable containing:
1.00000e-06 *
  6.6001
[torch.FloatTensor of size 1]

In [None]:
just_centers1 = LAFs1[:,:,2];
just_centers2_repr_to_1 = LHF2_reprojected_to_1[:,0:2,2];
print 'min, max AP1', just_centers1.data.cpu().numpy().min(), just_centers1.data.cpu().numpy().max()
print 'min, max AP2', just_centers2_repr_to_1.data.cpu().numpy().min(), just_centers2_repr_to_1.data.cpu().numpy().max()


In [12]:

dist  = distance_matrix_vector(just_centers2_repr_to_1, just_centers1)


In [24]:
print just_centers2_repr_to_1[59,:]
print just_centers1[59,:]


Variable containing:
 440.2994
  24.0781
[torch.FloatTensor of size 2]

Variable containing:
 440.2048
  24.1159
[torch.FloatTensor of size 2]



In [49]:
def distance_matrix_vector(anchor, positive):
    """Given batch of anchor descriptors and positive descriptors calculate distance matrix"""

    d1_sq = torch.sum(anchor * anchor, dim=1)
    d2_sq = torch.sum(positive * positive, dim=1)
    eps = 1e-6
    return torch.sqrt(torch.abs((d1_sq.expand(positive.size(0), anchor.size(0)) +
                       torch.t(d2_sq.expand(anchor.size(0), positive.size(0)))
                      - 2.0 * torch.bmm(positive.unsqueeze(0), torch.t(anchor).unsqueeze(0)).squeeze(0))+eps))
print distance_matrix_vector(just_centers2_repr_to_1[59:60,:],just_centers1[59:60,:])

Variable containing:
 0.1768
[torch.FloatTensor of size 1x1]



In [15]:
dist_np = dist.data.cpu().numpy()

In [21]:
print np.where(np.isnan(dist_np))

(array([ 59, 215, 277, 285, 364, 398, 407, 472, 496, 747, 903, 910, 955,
       960, 964]), array([ 59, 215, 277, 285, 364, 398, 407, 472, 496, 747, 903, 910, 955,
       960, 964]))


In [7]:
print model.detector
#print np.sum(np.isnan(model.detector.shift_net[0].weight.data.cpu().numpy()))
print model.detector.shift_net[0].weight.data.cpu().numpy()

SpatialTransformer2d (
  (spatial_transformer_feature_net): Sequential (
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): ReLU ()
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (3): ReLU ()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (5): ReLU ()
  )
  (psi_net): Sequential (
    (0): Conv2d(32, 1, kernel_size=(4, 4), stride=(4, 4))
    (1): Tanh ()
  )
  (theta_net): Sequential (
    (0): Conv2d(32, 1, kernel_size=(4, 4), stride=(4, 4))
    (1): Tanh ()
  )
  (shift_net): Sequential (
    (0): Conv2d(32, 2, kernel_size=(4, 4), stride=(4, 4))
    (1): Tanh ()
  )
  (iso_scale_net): Sequential (
    (0): Conv2d(32, 1, kernel_size=(4, 4), stride=(4, 4))
    (1): Tanh ()
  )
  (horizontal_tilt_net): Sequential (
    (0): Conv2d(32, 1, kernel_size=(4, 4), stride=(4, 4))
    (1): Tanh ()
  )
)
[[[[ nan  nan  nan  nan]
   [ nan  nan  nan  nan]
   [ nan  nan