In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
#export
from nb_002b import *

In [None]:
DATA_PATH = Path('data')
PATH = DATA_PATH/'cifar10_dog_air'
TRAIN_PATH = PATH/'train'

In [None]:
train_ds = FilesDataset(PATH/'train')
valid_ds = FilesDataset(PATH/'test')

In [None]:
x = train_ds[1][0]

Perspective wrapping is all explained here: https://web.archive.org/web/20150222120106/xenia.media.mit.edu/~cwren/interpolator/

Other source: http://www.math.ubc.ca/~cass/graphics/Perspective.pdf

We will apply the transformation:

$$(x,y) \rightarrow \left ( \frac{ax + by + c}{gx + hy + 1}, \frac{dx + ey + f}{gx + hy + 1} \right )$$

to the coordinates, where (a,b,c,d,e,f,g,h) are 8 cofficients we need to find. To do this we solve a system of 8 equations given by where we want to send four points (with two coordinates each). Usually it will be the four corners of the picture.

In [None]:
#export
def find_coeffs(ori_pts, targ_pts):
    matrix = []
    #The equations we'll need to solve.
    for p1, p2 in zip(targ_pts, ori_pts):
        matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]])
        matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]])

    A = FloatTensor(matrix)
    B = FloatTensor(ori_pts).view(8)
    #The 8 scalars we seek are solution of AX = B, we use the pseudo inverse to compute them since it's more numerically stable.
    
    res = torch.mv(torch.mm(torch.inverse(torch.mm(A.t(),A)), A.t()), B)
    #res = numpy.dot(numpy.linalg.inv(A.T * A) * A.T, B)
    return res

In [None]:
ori_pts = [[-1,-1], [1,-1], [-1,1], [1,1]]
targ_pts = [[-1,-1], [1,-0.5], [-1,1], [1,1]]

In [None]:
coeffs = find_coeffs(ori_pts, targ_pts)

In [None]:
coeffs

Remember we have to do
$$(x,y) \rightarrow \left ( \frac{ax + by + c}{gx + hy + 1}, \frac{dx + ey + f}{gx + hy + 1} \right )$$
to a lot of (x,y) coordinates that will be in a matrix c. Let's say it's of shape N * 2 to be simpler. If we add ones to the second dimension to make the matrix N * 3, and if we rewrite the coeffs in a matrix
$$\left ( \begin{array}{ccc} a & b & c \\ d & e & f \\ g & h & 1 \end{array} \right )$$
then the matrix product c @ coeffs.t() will be N * 3, and it will be all the
$$(ax + by + c, dx + ey + f, gx + hy + 1).$$
We just need to divide the first two columns by the last one to get the new coordinates.

In [None]:
def add_ones(coords):
    coords = coords.view(-1,2)
    ones = torch.ones(coords.size(0)).unsqueeze(1)
    coords = torch.cat([coords, ones], 1)
    return coords

In [None]:
def apply_perspective(coords, coeffs):
    ori_size = coords.size()
    #compress all the dims expect the last one ang adds ones, coords become N * 3
    coords = add_ones(coords)
    #Transform the coeffs in a 3*3 matrix with a 1 at the bottom left
    coeffs = torch.cat([coeffs, FloatTensor([1])]).view(3,3)
    coords = torch.mm(coords, coeffs.t())
    coords.mul_(1/coords[:,2].unsqueeze(1))
    return coords[:,:2].view(ori_size)

In [None]:
m = torch.eye(3)[:2]
coords = F.affine_grid(m[None], torch.Size((1,) + x.size()))

In [None]:
%timeit res = apply_perspective(coords, coeffs)

In [None]:
y = F.grid_sample(x[None], res)

The top right corner should be lowered by one quarter of the image.

In [None]:
show_image(y[0])

In [None]:
show_image(x)

Just checking if not adding ones is faster or slower.

In [None]:
#export
def apply_perspective(coords, coeffs):
    ori_size = coords.size()
    #compress all the dims expect the last one ang adds ones, coords become N * 3
    coords = coords.view(-1,2)
    #Transform the coeffs in a 3*3 matrix with a 1 at the bottom left
    coeffs = torch.cat([coeffs, FloatTensor([1])]).view(3,3)
    coords = torch.addmm(coeffs[:,2], coords, coeffs[:,:2].t())
    coords.mul_(1/coords[:,2].unsqueeze(1))
    return coords[:,:2].view(ori_size)

In [None]:
%timeit res = apply_perspective(coords, coeffs)

This version is a bit faster (instead of adding the ones, we do coords = coords * (first two columns).t() + last column

In [None]:
y = F.grid_sample(x[None], res)

In [None]:
show_image(y[0])

First thing we can try, moving all the corners by different bits.

In [None]:
@reg_transform
def perspective_warp(c, img_size, magnitude:uniform=0) -> TfmType.Coord:
    magnitude = magnitude.view(4,2)
    ori_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    targ_pts = [[x+m for x,m in zip(xs, ms)] for xs, ms in zip(ori_pts, magnitude)]
    coeffs = find_coeffs(ori_pts, targ_pts)
    return apply_perspective(c, coeffs)

In [None]:
tfms = [perspective_warp_tfm(magnitude=(-0.4,0.4,8))]
_,axes = plt.subplots(4,4, figsize=(12,12))
for ax in axes.flatten():
    y = apply_tfms(tfms)(x, padding_mode='zeros')
    show_image(y, ax)

To be a bit less messy, perspective wraps are of two type: tilt and skews. Tilt changes the perspective we see the image from (on the left, right, top or bottom), skex changes one corner only.

In [None]:
#export
def rand_int(low,high): return random.randint(low, high)

In [None]:
#export
@reg_transform
def tilt(c, img_size, direction:rand_int, magnitude:uniform=0) -> TfmType.Coord:
    ori_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    if direction == 0:   targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]]
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]]
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]]
    elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]]  
    coeffs = find_coeffs(ori_pts, targ_pts)
    return apply_perspective(c, coeffs)

In [None]:
#export
@reg_transform
def skew(c, img_size, direction:rand_int, magnitude:uniform=0) -> TfmType.Coord:
    ori_pts = [[-1,-1], [-1,1], [1,-1], [1,1]]
    if direction == 0:   targ_pts = [[-1-magnitude,-1], [-1,1], [1,-1], [1,1]]
    elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1], [1,-1], [1,1]]
    elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1,1]]
    elif direction == 3: targ_pts = [[-1,-1], [-1,1+magnitude], [1,-1], [1,1]]
    elif direction == 4: targ_pts = [[-1,-1], [-1,1], [1+magnitude,-1], [1,1]]
    elif direction == 5: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1]]
    elif direction == 6: targ_pts = [[-1,-1], [-1,1], [1,-1], [1+magnitude,1]]
    elif direction == 7: targ_pts = [[-1,-1], [-1,1], [1,-1], [1,1+magnitude]] 
    coeffs = find_coeffs(ori_pts, targ_pts)
    return apply_perspective(c, coeffs)

In [None]:
x = train_ds[1][0]
x.size()

The four deterministic tilts, going to the back of the image on the first row, and to the front on the second one.

In [None]:
_,axes = plt.subplots(2,4, figsize=(12,6))
for i,ax in enumerate(axes.flatten()):
    magns = [-0.4,0.4]
    y = apply_affine(m=None,func=partial(tilt, direction=i%4, magnitude=magns[i//4]))(x, padding_mode='zeros')
    show_image(y, ax)

The 8 types of skew, again back or front.

In [None]:
_,axes = plt.subplots(4,4, figsize=(8,8))
for i,ax in enumerate(axes.flatten()):
    magns = [-0.4,0.4]
    y = apply_affine(m=None,func=partial(skew, direction=i%8, magnitude=magns[i//8]))(x, padding_mode='zeros')
    show_image(y, ax)
    plt.tight_layout()

Now with a rectangular image.

In [None]:
img = Image.open(DATA_PATH/'caltech101/airplanes/image_0054.jpg')
x = pil2tensor(img)
show_image(x)

In [None]:
_,axes = plt.subplots(2,4, figsize=(12,3))
for i,ax in enumerate(axes.flatten()):
    magns = [-0.4,0.4]
    y = apply_affine(m=None,func=partial(tilt, direction=i%4, magnitude=magns[i//4]))(x, padding_mode='zeros')
    show_image(y, ax)

In [None]:
_,axes = plt.subplots(4,4, figsize=(12,6))
for i,ax in enumerate(axes.flatten()):
    magns = [-0.4,0.4]
    y = apply_affine(m=None,func=partial(skew, direction=i%8, magnitude=magns[i//8]))(x, padding_mode='zeros')
    show_image(y, ax)
    plt.tight_layout()