In [None]:
%matplotlib inline

import sys, os
sys.path.insert(0, '../../pytorch-segmentation-detection/vision/')
sys.path.append('../../pytorch-segmentation-detection/')

# Use second GPU -pytorch-segmentation-detection- change if you want to use a first one
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

from PIL import Image
from matplotlib import pyplot as plt

import torch
from torchvision import transforms
from torch.autograd import Variable
import pytorch_segmentation_detection.models.resnet_dilated as resnet_dilated

import numpy as np
import glob

import sys; sys.path.append('../dataset')
sys.path.append('../correspondence_tools')
import correspondence_plotter
from spartan_dataset_masked import SpartanDataset


descriptor_dimensionality = 3
nets = sorted(glob.glob("trained_models/train_only_10_drill_long_"+str(descriptor_dimensionality)+"d/dense_resnet*.pth"))
print "Networks:"
for net in nets:
    print "   - ", net

lf = SpartanDataset(mode="test")
scene = "06_drill_long_downsampled"

img_a_index = "000000"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)

img_b_index = "001337"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

In [None]:
valid_transform = transforms.Compose(
                [
                     transforms.ToTensor(),
                ])

def forward_on_img(net, img):
    img = valid_transform(img)
    img = img.unsqueeze(0)
    img = Variable(img.cuda())
    fcn = resnet_dilated.Resnet34_8s(num_classes=descriptor_dimensionality)
    fcn.load_state_dict(torch.load(net))
    fcn.cuda()
    fcn.eval()
    res = fcn(img)
    res = res.squeeze(0)
    res = res.permute(1,2,0)
    res = res.data.cpu().numpy().squeeze()
    return res

res_a = forward_on_img(nets[0], img_a_rgb)
res_b = forward_on_img(nets[0], img_b_rgb)

In [None]:
def normalize(res):
    normed_res = res + -np.min(res)
    normed_res = normed_res / np.max(normed_res)
    return normed_res

if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    fig, axes = plt.subplots(nrows=3, ncols=2)
    fig.set_figheight(15)
    fig.set_figwidth(15)
    axes = axes.flat[0:]
    images = [img_a_rgb, img_b_rgb, img_a_depth, img_b_depth, normalize(res_a), normalize(res_b)]
    for ax, img in zip(axes, images):
        ax.imshow(img)
    plt.show()

## What does evolution of descriptors look like?

In [None]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

In [None]:
if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    for index, this_net in enumerate(nets):
        fig, axes = plt.subplots(nrows=1, ncols=2)
        fig.set_figheight(5)
        fig.set_figwidth(15)
        if index == 0:
            axes[0].set_title("img a")
            axes[1].set_title("img b")
        axes[0].imshow(normalize(forward_on_img(this_net, img_a_rgb)))
        axes[1].imshow(normalize(forward_on_img(this_net, img_b_rgb)))
        axes[0].set_ylabel(this_net)
        plt.show()

## How about overlaying these images?



In [None]:
this_net = nets[-1]
if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    fig, axes = plt.subplots(1, ncols=2)
    fig.set_figheight(5)
    fig.set_figwidth(15)
    axes[0].set_title("img a")
    axes[1].set_title("img b")
    axes[0].imshow(normalize(forward_on_img(this_net, img_a_rgb)))
    axes[0].imshow(img_a_rgb, alpha=0.5)
    axes[1].imshow(normalize(forward_on_img(this_net, img_b_rgb)))
    axes[1].imshow(img_b_rgb, alpha=0.5)
    axes[0].set_ylabel(this_net)
    plt.show()

## What if we plot the heatmap of correspondences between two images?

In [None]:
import correspondence_finder as cf
uv_a, uv_b = cf.batch_find_pixel_correspondences(img_a_depth, img_a_pose, 
                                img_b_depth, img_b_pose, 
                                num_attempts=100)

    # uv_a = (390,390),(171,171)
    # uv_b = (495,495),(322,322)

# first element is width (640 max)
# second element is heigh (480 max)
# switch these here
sample_from_a = (uv_a[1][0], uv_a[0][0])
# now it's 480, 640
res_a = forward_on_img(this_net, img_a_rgb)
# shape is (480, 640, 3)
descriptor_at_pixel = res_a[sample_from_a[0], sample_from_a[1]]
print descriptor_at_pixel

In [None]:
res_b = forward_on_img(this_net, img_b_rgb)
norm_diffs = np.zeros_like(img_b_depth)*0.0
for i, row in enumerate(norm_diffs):
    for j, value in enumerate(row):
        norm_diffs[i][j] = np.linalg.norm(res_b[i][j] - descriptor_at_pixel)**2

print norm_diffs
plt.imshow(norm_diffs, cmap=plt.cm.BuPu_r)
plt.colorbar()
plt.show()
plt.imshow(norm_diffs, cmap=plt.cm.BuPu_r, vmax = 0.5)
plt.colorbar()
plt.show()

norm_diffs_numpy = np.sum(np.square(res_b - descriptor_at_pixel), axis=2)
plt.imshow(norm_diffs, cmap=plt.cm.BuPu_r, vmax = 0.5)
plt.colorbar()
plt.show()

In [None]:
from matplotlib.patches import Circle
fig, axes = plt.subplots(1, ncols=2)
fig.set_figheight(5)
fig.set_figwidth(15)
axes[0].set_title("img a")
axes[1].set_title("img b")
circ = Circle(sample_from_a[::-1], radius=10, facecolor='g', edgecolor='white', fill=True ,linewidth = 2.0, linestyle='solid')
axes[0].add_patch(circ)
axes[0].imshow(img_a_rgb, alpha=0.8)
axes[1].imshow(norm_diffs, cmap=plt.cm.BuPu_r)
axes[1].imshow(img_b_rgb, alpha=0.2)
axes[0].set_ylabel(this_net)
plt.show()

## Threshold the heatmap, compute the best predicted match (purple circle), and "ground truth" best match (green)

In [None]:
def make_correspondence_heatmap(pixel_a, pixel_b, img_a_rgb, img_b_rgb, res_a, res_b):
    descriptor_at_pixel = res_a[pixel_a[0], pixel_a[1]]
    norm_diffs = np.zeros_like(img_b_depth)*0.0
    for i, row in enumerate(norm_diffs):
        for j, value in enumerate(row):
            norm_diffs[i][j] = np.linalg.norm(res_b[i][j] - descriptor_at_pixel)**2
    fig, axes = plt.subplots(1, ncols=2)
    fig.set_figheight(5)
    fig.set_figwidth(15)
    axes[0].set_title("img a")
    axes[1].set_title("img b")
    circ = Circle(pixel_a[::-1], radius=10, facecolor='g', edgecolor='white', fill=True ,linewidth = 2.0, linestyle='solid')
    axes[0].add_patch(circ)
    axes[0].imshow(img_a_rgb, alpha=0.8)
    axes[1].imshow(norm_diffs, cmap=plt.cm.BuPu_r, vmax=2)
    axes[1].imshow(img_b_rgb, alpha=0.2)
    circ = Circle(pixel_b[::-1], radius=10, facecolor='g', edgecolor='white', fill=True ,linewidth = 2.0, linestyle='solid', alpha=0.8)
    axes[1].add_patch(circ)
    # plot best match
    best_match = np.argmin(norm_diffs)
    best_match = (best_match/640, best_match%640)
    circ = Circle(best_match[::-1], radius=10, facecolor='purple', edgecolor='white', fill=True ,linewidth = 2.0, linestyle='solid', alpha=0.8)
    axes[1].add_patch(circ)
    axes[0].set_ylabel(this_net)
    plt.show()
    
res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)
for i in range(len(uv_a[0])):
    if i >= 10:
        break
    sample_from_a = (uv_a[1][i], uv_a[0][i])
    answer_from_b = (uv_b[1][i], uv_b[0][i])
    make_correspondence_heatmap(sample_from_a, answer_from_b, img_a_rgb, img_b_rgb, res_a, res_b)

## How about correspondences from lots of different views, different scenes?

In [None]:
from labelfusion import LabelFusionDataset
lf = LabelFusionDataset()
for i in range(10):
    scene_directory = lf.get_random_scene_directory()
    img_a_rgb, img_a_depth, img_a_pose = lf.get_random_rgbd_with_pose(scene_directory)
    img_b_rgb, img_b_depth, img_b_pose = lf.get_different_rgbd_with_pose(scene_directory, img_a_pose)
    uv_a, uv_b = cf.batch_find_pixel_correspondences(img_a_depth, img_a_pose, 
                                                    img_b_depth, img_b_pose, 
                                                    num_attempts=100)

    res_a = forward_on_img(this_net, img_a_rgb)
    res_b = forward_on_img(this_net, img_b_rgb)

    sample_from_a = (uv_a[1][0], uv_a[0][0])
    answer_from_b = (uv_b[1][0], uv_b[0][0])
    make_correspondence_heatmap(sample_from_a, answer_from_b, img_a_rgb, img_b_rgb, res_a, res_b)


## What do the descriptors look like in different scenes?

In [None]:
scene = "2017-06-13-01"
img_a_index = "0000000001"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)
img_b_index = "0000001000"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)

if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    fig, axes = plt.subplots(nrows=3, ncols=2)
    fig.set_figheight(15)
    fig.set_figwidth(15)
    axes = axes.flat[0:]
    images = [img_a_rgb, img_b_rgb, img_a_depth, img_b_depth, normalize(res_a), normalize(res_b)]
    for ax, img in zip(axes, images):
        ax.imshow(img)
    plt.show()

In [None]:
this_net = nets[-1]
if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    fig, axes = plt.subplots(1, ncols=2)
    fig.set_figheight(5)
    fig.set_figwidth(15)
    axes[0].set_title("img a")
    axes[1].set_title("img b")
    axes[0].imshow(normalize(forward_on_img(this_net, img_a_rgb)))
    axes[0].imshow(img_a_rgb, alpha=0.5)
    axes[1].imshow(normalize(forward_on_img(this_net, img_b_rgb)))
    axes[1].imshow(img_b_rgb, alpha=0.5)
    axes[0].set_ylabel(this_net)
    plt.show()

In [None]:
scene = "2017-06-13-15"
img_a_index = "0000000001"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)
img_b_index = "0000001000"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)

if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    fig, axes = plt.subplots(nrows=3, ncols=2)
    fig.set_figheight(15)
    fig.set_figwidth(15)
    axes = axes.flat[0:]
    images = [img_a_rgb, img_b_rgb, img_a_depth, img_b_depth, normalize(res_a), normalize(res_b)]
    for ax, img in zip(axes, images):
        ax.imshow(img)
    plt.show()

In [None]:
scene = "2017-06-13-16"
img_a_index = "0000000200"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)
img_b_index = "0000001000"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)

if (descriptor_dimensionality == 1) or (descriptor_dimensionality == 3):
    fig, axes = plt.subplots(nrows=3, ncols=2)
    fig.set_figheight(15)
    fig.set_figwidth(15)
    axes = axes.flat[0:]
    images = [img_a_rgb, img_b_rgb, img_a_depth, img_b_depth, normalize(res_a), normalize(res_b)]
    for ax, img in zip(axes, images):
        ax.imshow(img)
    plt.show()

## What if we try to find a match for the drill, in a different scene?

In [None]:
scene = "2017-06-13-12"
img_a_index = "0000000001"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)

scene = "2017-06-13-01"
img_b_index = "0000000800"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

# I have to choose these samples by hand, since don't have ground truth for matching between scenes
sample_from_a = (200, 350)
answer_from_b = (200, 4500)

res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)

make_correspondence_heatmap(sample_from_a, answer_from_b, img_a_rgb, img_b_rgb, res_a, res_b)

In [None]:
scene = "2017-06-13-12"
img_a_index = "0000000001"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)

scene = "2017-06-13-15"
img_b_index = "0000000001"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

# I have to choose these samples by hand, since don't have ground truth for matching between scenes
sample_from_a = (140, 350)
answer_from_b = (2500, 285)

res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)

make_correspondence_heatmap(sample_from_a, answer_from_b, img_a_rgb, img_b_rgb, res_a, res_b)

In [None]:
scene = "2017-06-13-12"
img_a_index = "0000000001"
img_a_rgb, img_a_depth, img_a_pose = lf.get_specific_rgbd_with_pose(scene, img_a_index)

scene = "2017-06-13-16"
img_b_index = "0000000201"
img_b_rgb, img_b_depth, img_b_pose = lf.get_specific_rgbd_with_pose(scene, img_b_index)

# I have to choose these samples by hand, since don't have ground truth for matching between scenes
sample_from_a = (145, 340)
answer_from_b = (2500, 285)

res_a = forward_on_img(this_net, img_a_rgb)
res_b = forward_on_img(this_net, img_b_rgb)

make_correspondence_heatmap(sample_from_a, answer_from_b, img_a_rgb, img_b_rgb, res_a, res_b)