# Tutorial: Creating a submission with GPU dependencies (DISK)

This notebook shows you how to create and submit a submission with custom dependencies.

First, follow the steps in [this notebook](https://www.kaggle.com/code/eduardtrulls/imc2022-dependencies/edit/run/91840821) and import the resulting "dataset" to this notebook. Your `input` folder should now contain `imc2022-dependencies` (see right pane).

The test set for this competition is hidden, and you score your solution by submitting the notebook. First, run the notebook with internet access on (right pane) and `dry_run=True`. Then you can set `dry_run=False`, toggle internet off, and submit the notebook for scoring using the "submit" button on the right pane.

In [1]:
dry_run = False

In [2]:
!pip install -f /kaggle/input/imc2022-dependencies/wheels --no-index torch_dimcheck
!pip install -f /kaggle/input/imc2022-dependencies/wheels --no-index torch_localize
!pip install -f /kaggle/input/imc2022-dependencies/wheels --no-index unets
!pip install -f /kaggle/input/imc2022-dependencies/wheels --no-index disk

!pip install ../input/loftrkornia/LOFTR-kornia/kornia-0.6.4-py2.py3-none-any.whl
!pip install ../input/loftrkornia/LOFTR-kornia/kornia_moons-0.1.9-py3-none-any.whl

Looking in links: /kaggle/input/imc2022-dependencies/wheels
Processing /kaggle/input/imc2022-dependencies/wheels/torch_dimcheck-0.0.1-py3-none-any.whl
Installing collected packages: torch-dimcheck
Successfully installed torch-dimcheck-0.0.1
Looking in links: /kaggle/input/imc2022-dependencies/wheels
Processing /kaggle/input/imc2022-dependencies/wheels/torch_localize-0.1.0-py3-none-any.whl
Installing collected packages: torch-localize
Successfully installed torch-localize-0.1.0
Looking in links: /kaggle/input/imc2022-dependencies/wheels
Processing /kaggle/input/imc2022-dependencies/wheels/unets-0.1.0-py3-none-any.whl
Installing collected packages: unets
Successfully installed unets-0.1.0
Looking in links: /kaggle/input/imc2022-dependencies/wheels
Processing /kaggle/input/imc2022-dependencies/wheels/disk-0.1.0-py3-none-any.whl
Processing /kaggle/input/imc2022-dependencies/wheels/tensorflow-2.8.0-cp37-cp37m-manylinux2010_x86_64.whl
Processing /kaggle/input/imc2022-dependenc

In [3]:
import os
import imageio
import numpy as np
from disk import DISK, Features
import torch
import torch.nn.functional as TorchFunctional
from torch.utils.data import DataLoader
from torch_dimcheck import dimchecked
from tqdm import tqdm
import argparse
from functools import partial
import h5py
import matplotlib.pyplot as plt
from glob import glob
import csv
import cv2
import pydegensac

import kornia
import kornia.feature as kornia_feature
from kornia_moons.feature import *
import gc

if not torch.cuda.is_available():
    print('You may want to enable the GPU switch?')

In [4]:
def FlattenMatrix(M, num_digits=8):
    '''Convenience function to write CSV files.'''
    
    return ' '.join([f'{v:.{num_digits}e}' for v in M.flatten()])


def BuildCompositeImage(im1, im2, axis=1, margin=0, background=1):
    '''Convenience function to stack two images with different sizes.'''
    
    if background != 0 and background != 1:
        background = 1
    if axis != 0 and axis != 1:
        raise RuntimeError('Axis must be 0 (vertical) or 1 (horizontal')

    h1, w1, _ = im1.shape
    h2, w2, _ = im2.shape

    if axis == 1:
        composite = np.zeros((max(h1, h2), w1 + w2 + margin, 3), dtype=np.uint8) + 255 * background
        if h1 > h2:
            voff1, voff2 = 0, (h1 - h2) // 2
        else:
            voff1, voff2 = (h2 - h1) // 2, 0
        hoff1, hoff2 = 0, w1 + margin
    else:
        composite = np.zeros((h1 + h2 + margin, max(w1, w2), 3), dtype=np.uint8) + 255 * background
        if w1 > w2:
            hoff1, hoff2 = 0, (w1 - w2) // 2
        else:
            hoff1, hoff2 = (w2 - w1) // 2, 0
        voff1, voff2 = 0, h1 + margin
    composite[voff1:voff1 + h1, hoff1:hoff1 + w1, :] = im1
    composite[voff2:voff2 + h2, hoff2:hoff2 + w2, :] = im2

    return (composite, (voff1, voff2), (hoff1, hoff2))


def DrawMatches(im1, im2, kp1, kp2, matches, axis=1, margin=0, background=0, linewidth=2):
    '''Draw keypoints and matches.'''
    
    composite, v_offset, h_offset = BuildCompositeImage(im1, im2, axis, margin, background)

    # Draw all keypoints.
    for coord_a, coord_b in zip(kp1, kp2):
        composite = cv2.drawMarker(composite, (int(coord_a[0] + h_offset[0]), int(coord_a[1] + v_offset[0])), color=(255, 0, 0), markerType=cv2.MARKER_CROSS, markerSize=5, thickness=1)
        composite = cv2.drawMarker(composite, (int(coord_b[0] + h_offset[1]), int(coord_b[1] + v_offset[1])), color=(255, 0, 0), markerType=cv2.MARKER_CROSS, markerSize=5, thickness=1)
    
    # Draw matches, and highlight keypoints used in matches.
    for idx_a, idx_b in matches:
        composite = cv2.drawMarker(composite, (int(kp1[idx_a, 0] + h_offset[0]), int(kp1[idx_a, 1] + v_offset[0])), color=(0, 0, 255), markerType=cv2.MARKER_CROSS, markerSize=12, thickness=1)
        composite = cv2.drawMarker(composite, (int(kp2[idx_b, 0] + h_offset[1]), int(kp2[idx_b, 1] + v_offset[1])), color=(0, 0, 255), markerType=cv2.MARKER_CROSS, markerSize=12, thickness=1)
        composite = cv2.line(composite,
                             tuple([int(kp1[idx_a][0] + h_offset[0]),
                                   int(kp1[idx_a][1] + v_offset[0])]),
                             tuple([int(kp2[idx_b][0] + h_offset[1]),
                                   int(kp2[idx_b][1] + v_offset[1])]), color=(0, 0, 255), thickness=1)
    
    return composite

In [5]:
# Read the pairs file.

src = '/kaggle/input/image-matching-challenge-2022/'

test_samples = []
with open(f'{src}/test.csv') as f:
    reader = csv.reader(f, delimiter=',')
    for i, row in enumerate(reader):
        # Skip header.
        if i == 0:
            continue
        test_samples += [row]

if dry_run:
    for sample in test_samples:
        print(sample)

In [6]:
# Pre-compute features. Code hastily copy-pasted with minor changes from:
# https://github.com/cvlab-epfl/disk/blob/master/detect.py

!rm -rf features

class Image:
    def __init__(self, bitmap: ['C', 'H', 'W'], fname: str, orig_shape=None):
        self.bitmap     = bitmap
        self.fname      = fname
        if orig_shape is None:
            self.orig_shape = self.bitmap.shape[1:]
        else:
            self.orig_shape = orig_shape

    def resize_to(self, shape):
        return Image(
            self._pad(self._interpolate(self.bitmap, shape), shape),
            self.fname,
            orig_shape=self.bitmap.shape[1:],
        )

    @dimchecked
    def to_image_coord(self, xys: [2, 'N']) -> ([2, 'N'], ['N']):
        f, _size = self._compute_interpolation_size(self.bitmap.shape[1:])
        scaled = xys / f

        h, w = self.orig_shape
        x, y = scaled

        mask = (0 <= x) & (x < w) & (0 <= y) & (y < h)

        return scaled, mask

    def _compute_interpolation_size(self, shape):
        x_factor = self.orig_shape[0] / shape[0]
        y_factor = self.orig_shape[1] / shape[1]

        f = 1 / max(x_factor, y_factor)

        if x_factor > y_factor:
            new_size = (shape[0], int(f * self.orig_shape[1]))
        else:
            new_size = (int(f * self.orig_shape[0]), shape[1])

        return f, new_size

    @dimchecked
    def _interpolate(self, image: ['C', 'H', 'W'], shape) -> ['C', 'h', 'w']:
        _f, size = self._compute_interpolation_size(shape)
        return TorchFunctional.interpolate(
            image.unsqueeze(0),
            size=size,
            mode='bilinear',
            align_corners=False,
        ).squeeze(0)
    
    @dimchecked
    def _pad(self, image: ['C', 'H', 'W'], shape) -> ['C', 'h', 'w']:
        x_pad = shape[0] - image.shape[1]
        y_pad = shape[1] - image.shape[2]

        if x_pad < 0 or y_pad < 0:
            raise ValueError("Attempting to pad by negative value")

        return TorchFunctional.pad(image, (0, y_pad, 0, x_pad))

    
class SceneDataset:
    def __init__(self, image_path, crop_size=(None, None)):
        self.image_path = image_path
        self.crop_size  = crop_size
        self.names = [p for p in os.listdir(image_path) \
                      if p.endswith(args.image_extension)]

    def __len__(self):
        return len(self.names)

    def __getitem__(self, ix):
        name   = self.names[ix]
        path   = os.path.join(self.image_path, name) 
        img    = np.ascontiguousarray(imageio.imread(path))
        tensor = torch.from_numpy(img).to(torch.float32)

        if len(tensor.shape) == 2: # some images may be grayscale
            tensor = tensor.unsqueeze(-1).expand(-1, -1, 3)

        bitmap              = tensor.permute(2, 0, 1) / 255.
        extensionless_fname = os.path.splitext(name)[0]

        image = Image(bitmap, extensionless_fname)

        if self.crop_size != (None, None):
            image = image.resize_to(self.crop_size)

        return image

    @staticmethod
    def collate_fn(images):
        bitmaps = torch.stack([im.bitmap for im in images], dim=0)
        
        return bitmaps, images


def extract(dataset, save_path):
    dataloader = DataLoader(
        dataset,
        batch_size=1,
        pin_memory=True,
        collate_fn=dataset.collate_fn,
        num_workers=4,
    )

    if args.mode == 'nms':
        extract = partial(
            model.features,
            kind='nms',
            window_size=args.window,
            cutoff=0.,
            n=args.n
        )
    else:
        extract = partial(model.features, kind='rng')

    os.makedirs(os.path.join(save_path), exist_ok=True)
    keypoint_h5   = h5py.File(os.path.join(save_path, 'keypoints.h5'), 'w')
    descriptor_h5 = h5py.File(os.path.join(save_path, 'descriptors.h5'), 'w')
    if args.detection_scores:
        score_h5 = h5py.File(os.path.join(save_path, 'scores.h5'), 'w')

    pbar = tqdm(dataloader)
    for bitmaps, images in pbar:
        bitmaps = bitmaps.to(DEV, non_blocking=True)

        with torch.no_grad():
            try:
                batched_features = extract(bitmaps)
            except RuntimeError as e:
                if 'U-Net failed' in str(e):
                    msg = ('Please use input size which is multiple of 16 (or '
                           'adjust the --height and --width flags to let this '
                           'script rescale it automatically). This is because '
                           'we internally use a U-Net with 4 downsampling '
                           'steps, each by a factor of 2, therefore 2^4=16.')

                    raise RuntimeError(msg) from e
                else:
                    raise

        for features, image in zip(batched_features.flat, images):
            features = features.to(CPU)

            kps_crop_space = features.kp.T
            kps_img_space, mask = image.to_image_coord(kps_crop_space)

            keypoints   = kps_img_space.numpy().T[mask]
            descriptors = features.desc.numpy()[mask]
            scores      = features.kp_logp.numpy()[mask]

            order = np.argsort(scores)[::-1]

            keypoints   = keypoints[order]
            descriptors = descriptors[order]
            scores      = scores[order]

            assert descriptors.shape[1] == args.desc_dim
            assert keypoints.shape[1] == 2

            if args.f16:
                descriptors = descriptors.astype(np.float16)

            keypoint_h5.create_dataset(image.fname, data=keypoints)
            descriptor_h5.create_dataset(image.fname, data=descriptors)

            if args.detection_scores:
                score_h5.create_dataset(image.fname, data=scores)

            pbar.set_postfix(n=keypoints.shape[0])
    
    
parser = argparse.ArgumentParser()
parser.add_argument(
    '--height', default=None, type=int,
    help='rescaled height (px). If unspecified, image is not resized in height dimension'
)
parser.add_argument(
    '--width', default=None, type=int,
    help='rescaled width (px). If unspecified, image is not resized in width dimension'
)
parser.add_argument(
    '--image-extension', default='jpg', type=str,
    help='This script ill process all files which match `image-path/*.{--image-extension}`'
)
parser.add_argument(
    '--f16', action='store_true',
    help='Store descriptors in fp16 (half precision) format'
)
parser.add_argument('--window', type=int, default=5, help='NMS window size')
parser.add_argument(
    '--n', type=int, default=None,
    help='Maximum number of features to extract. If unspecified, the number is not limited'
)
parser.add_argument(
    '--desc-dim', type=int, default=128,
    help='descriptor dimension. Needs to match the checkpoint value.'
)
parser.add_argument(
    '--mode', choices=['nms', 'rng'], default='nms',
    help=('Whether to extract features using the non-maxima suppresion mode or '
          'through training-time grid sampling technique')
)
parser.add_argument(
     '--model_path', type=str, default='/kaggle/input/imc2022-dependencies/pretrained/disk-depth.pth',
    help="Path to the model's .pth save file"
)
parser.add_argument('--detection-scores', action='store_true')


# Hacky copy-paste: parameters go here.
args = parser.parse_args('--n 1000 --window 9 --height 768 --width 1024 --image-extension png'.split())
print(args)

DEV = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
CPU = torch.device('cpu')
state_dict = torch.load(args.model_path, map_location='cpu')

# For compatibility with older model saves.
if 'extractor' in state_dict:
    weights = state_dict['extractor']
elif 'disk' in state_dict:
    weights = state_dict['disk']
else:
    raise KeyError('Incompatible weight file!')

model = DISK(window=args.window, desc_dim=128)
model.load_state_dict(weights)
model = model.to(DEV)

# Extract features for every image in every folder.
for dataset_folder in glob('/kaggle/input/image-matching-challenge-2022/test_images/*'):
    batch_id = dataset_folder.split('/')[-1]
    print(f'Processing "{dataset_folder}"')
    dataset = SceneDataset(dataset_folder, crop_size=(args.height, args.width))
    described_samples = extract(dataset, f'features/{batch_id}')

Namespace(desc_dim=128, detection_scores=False, f16=False, height=768, image_extension='png', mode='nms', model_path='/kaggle/input/imc2022-dependencies/pretrained/disk-depth.pth', n=1000, width=1024, window=9)


  cpuset_checked))


Processing "/kaggle/input/image-matching-challenge-2022/test_images/1cf87530"


100%|██████████| 2/2 [00:05<00:00,  2.80s/it, n=1000]


Processing "/kaggle/input/image-matching-challenge-2022/test_images/6ceaefff"


100%|██████████| 2/2 [00:00<00:00,  2.47it/s, n=1000]


Processing "/kaggle/input/image-matching-challenge-2022/test_images/d91db836"


100%|██████████| 2/2 [00:00<00:00,  2.83it/s, n=1000]


In [7]:
F_dict = {}
for i, row in enumerate(test_samples):
    sample_id, batch_id, image_1_id, image_2_id = row

    with h5py.File(f'features/{batch_id}/keypoints.h5', 'r') as kp_dict, h5py.File(f'features/{batch_id}/descriptors.h5', 'r') as desc_dict:
        # Compute matches.
        dists, idxs = kornia_feature.match_snn(torch.from_numpy(desc_dict[image_1_id][()]), torch.from_numpy(desc_dict[image_2_id][()]), 0.9)
        match_idxs = idxs.detach().cpu().numpy()

      
        if len(match_idxs) > 8:
            F, inliers = pydegensac.findFundamentalMatrix(kp_dict[image_1_id][()][match_idxs[:, 0]],
                                                kp_dict[image_2_id][()][match_idxs[:, 1]],
                                                0.95,
                                                0.99,
                                                2000)
        
            inliers = inliers > 0
            assert F.shape == (3, 3), 'Malformed F?'
            F_dict[sample_id] = F
        else:
            F_dict[sample_id] = np.zeros((3, 3))
            continue
        gc.collect()

        if dry_run:
            image_1 = cv2.cvtColor(cv2.imread(f'{src}/test_images/{batch_id}/{image_1_id}.png'), cv2.COLOR_BGR2RGB)
            image_2 = cv2.cvtColor(cv2.imread(f'{src}/test_images/{batch_id}/{image_2_id}.png'), cv2.COLOR_BGR2RGB)
            matches_after_ransac = np.array([match for match, is_inlier in zip(match_idxs, inliers) if is_inlier])

            im_inliers = DrawMatches(image_1, image_2, kp_dict[image_1_id][()], kp_dict[image_2_id][()], matches_after_ransac)
            fig = plt.figure(figsize=(15, 15))
            plt.title(f'{image_1_id}-{image_2_id}')
            plt.imshow(im_inliers)
            plt.axis('off')
            plt.show()

with open('submission.csv', 'w') as f:
    f.write('sample_id,fundamental_matrix\n')
    for sample_id, F in F_dict.items():
        f.write(f'{sample_id},{FlattenMatrix(F)}\n')

if dry_run:
    !cat submission.csv