### Installs and Imports

In [1]:
!cp '../input/weightedboxesfusion' . -r
!pip install --no-deps './weightedboxesfusion' > /dev/null

In [2]:
import os
import ast
import numba
import re
import gc
import cv2
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from shutil import copy
from os.path import join, exists
from numba import jit
from typing import List, Union, Tuple
from tqdm import tqdm
from collections import OrderedDict
import torch
import torch.nn as nn
from torch.optim import Adam
from torch import LongTensor as LongTensor
from torch import FloatTensor as FloatTensor
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torchvision
from torchvision.models import resnet
from torchvision.models._utils import IntermediateLayerGetter
from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator, RPNHead, RegionProposalNetwork
from torchvision.models.detection.roi_heads import RoIHeads
from torchvision.models.detection.transform import GeneralizedRCNNTransform
from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
from torchvision.ops import MultiScaleRoIAlign
from torchvision.ops.misc import FrozenBatchNorm2d
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from sklearn.cluster import AgglomerativeClustering
from ensemble_boxes import *

np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x7fba275e8e30>

In [3]:
N_CLUSTERS = 4
PL_ROUNDS = 1
PL_EPOCHS = 5
detection_threshold = 0.5
image_size = 1024

In [4]:
dataset_path = '/kaggle/input/global-wheat-detection'
original_train_path = '/kaggle/input/original-train/original_train'
# weights_path = '/kaggle/input/1024x1024-1/weights_e149.pth'
weights_path = '/kaggle/input/manual-weights-7/e152.pth'
train_path = join(dataset_path, 'train')
test_path = join(dataset_path, 'test')
train_df = pd.read_csv(join(original_train_path, 'train.csv'))
train_df['image_path'] = [join(original_train_path, s, i + '.jpeg') for i, s in train_df[['image_id', 'source']].values]
test_df = pd.read_csv(join(dataset_path, 'sample_submission.csv'))

In [5]:
# Split validation from training
# Take 30 examples from each source
# valid_ids = []
# for source in pd.unique(train_df['source']):
#     if source == 'test': continue
#     source_ids = train_df[train_df['source'] == source]['image_id'].values
#     valid_ids.extend(list(np.random.choice(source_ids, 50, replace=False)))

# valid_ids = [p.split('/')[-1].split('.')[0] for p in glob('/kaggle/input/global-wheat-detection/test/*')[::10]]
# print(valid_ids)
# print(len(valid_ids))

In [6]:
id_to_path = {}
for idx, row in train_df.iterrows():
    image_id = row['image_id']
    if image_id in id_to_path.keys(): continue
    id_to_path[image_id] = row['image_path']

for idx, row in test_df.iterrows():
    image_id = row['image_id']
    if image_id in id_to_path.keys(): continue
    id_to_path[image_id] = join(test_path, image_id + '.jpg')

In [7]:
print(list(train_df.columns))

['image_id', 'width', 'height', 'source', 'x', 'y', 'w', 'h', 'x1', 'y1', 'image_id_orig', 'image_path']


### Dataset functions

In [8]:
class WheatTrainDataset(Dataset):
    def __init__(self, dataframe, transforms=None, test=False):
        super().__init__()
        self.df = dataframe
        self.image_ids = pd.unique(dataframe['image_id'])
        self.image_paths = [id_to_path[image_id] for image_id in self.image_ids]
        self.length = len(self.image_ids)
        self.transforms = transforms
        self.test = test

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image_path = self.image_paths[index]
        image = self.load_image(image_path) 
        boxes = self.load_boxes(image_id)

        # if not self.test and random.random() > 0.5:
        #     image, boxes = self.cutmix_image_and_boxes(image, boxes)

        # there is only one class
        labels = torch.ones((boxes.shape[0],), dtype=torch.int64)

        target = {'boxes': boxes, 'labels': labels, 'image_id': torch.tensor([index])}

        if self.transforms:
            sample = {'image': image, 'bboxes': target['boxes'], 'labels': labels}
            sample = self.transforms(**sample)
            image, boxes = sample['image'], sample['bboxes']
            boxes = self.filter_boxes(boxes)
            if len(boxes):
                target['boxes'] = torch.stack([torch.tensor(box, dtype=torch.float32) for box in zip(*boxes)]).permute(1, 0)
            else:
                return self.__getitem__(np.random.randint(self.length))
                # target['boxes'] = torch.zeros((0, 4), dtype=torch.float32)
                # target['labels'] = torch.zeros(0, dtype=torch.int64)

        return image, target, image_id

    def __len__(self) -> int:
        return self.length

    def load_image(self, image_path):
        image = cv2.imread(image_path , cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image

    def load_boxes(self, image_id):
        records = self.df[self.df['image_id'] == image_id]
        if 'x1' in records.columns and 'y1' in records.columns:
            return records[['x', 'y', 'x1', 'y1']].values
        else:
            boxes = records[['x', 'y', 'w', 'h']].values
            boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
            boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
            return boxes

    def filter_boxes(self, boxes):
        min_length = 13
        min_area = 400
        max_area = 145360
        max_length_ratio = 18
        min_length_ratio = 1.0/max_length_ratio

        boxes_out = []
        for box in boxes:
            x, y, x1, y1 = box
            w = x1 - x
            if w < min_length: continue
            h = y1 - y
            if h < min_length: continue
            area = w * h
            if area < 400 or area > 145360: continue
            length_ratio = w / h
            if length_ratio < min_length_ratio or length_ratio > max_length_ratio: continue
            boxes_out.append(box)

        return boxes_out

    def get_sample_weights(self):
        weights = []
        for image_id in self.image_ids:
            w, h, source = self.df[self.df['image_id'] == image_id][['width', 'height', 'source']].values[0]
            if w == 3072:
                # 2x3
                weight = 8
            elif h == 2048:
                # 2x2
                weight = 5
            elif w == 2048:
                # 1x2
                weight = 2.5
            else:
                # 1x1
                weight = 1
            if source == 'test': weight *= 10
            weights.append(weight)
        return np.array(weights)

    
class WheatTestDataset(Dataset):

    def __init__(self, dataframe, image_dir, image_size=1024, onfly=False, tta=True):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.image_size = image_size
        self.onfly = onfly
        self.tta = tta
#         if onfly:
#             self.load_all_images()

#     def load_all_images(self):
#         self.images = []
#         for image_id in self.image_ids:
            
        
    def load_image(self, image_id):
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (self.image_size, self.image_size))
        image = image.astype(np.float32)
        image /= 255.0
        return image
        
    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = self.load_image(image_id)
        if self.tta:
            all_images = apply_tta(image)
            return all_images, image_id
        else:
            return image, image_id
    
    def __len__(self) -> int:
        return self.image_ids.shape[0]


def collate_fn(batch): return tuple(zip(*batch))

### Transforms & Ensembling

In [9]:
def run_wbf(predictions, image_size=1024, iou_thr=0.4, skip_box_thr=0.7, weights=None):
    boxes = [pred['boxes'].data.cpu().numpy() for pred in predictions]
    boxes = revert_tta(boxes, image_size)
    boxes = [box_set/(image_size-1) for box_set in boxes]
    scores = [pred['scores'].data.cpu().numpy() for pred in predictions]
    labels = [np.ones(pred['scores'].shape[0]) for pred in predictions]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels

# def ensemble_wbf(boxes, scores, image_size=1024, iou_thr=0.35, skip_box_thr=0.65, weights=None):
#     boxes = [box_set/(image_size-1) for box_set in boxes]
#     labels = [np.ones(len(score)) for score in scores]
#     boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
#     boxes = boxes*(image_size-1)
#     return boxes, scores, labels

In [10]:
def get_train_transforms():
    return A.Compose(
        [
            A.RandomCrop(1024, 1024),
            A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.9),
            ], p=0.9),
            A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            # A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ],
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )


def get_valid_transforms():
    return A.Compose(
        [
            A.RandomCrop(1024, 1024),
#             A.RandomSizedCrop(min_max_height=(800, 800), height=1024, width=1024, p=0.5),
#             A.HorizontalFlip(p=0.5),
#             A.VerticalFlip(p=0.5),
            ToTensorV2(p=1.0),
        ],
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0,
            min_visibility=0,
            label_fields=['labels']
        )
    )

def apply_tta(image):
    """Apply Test Time Augmentation (TTA)"""
    flipV_sample = flip_v_image(image)
    flipH_sample = flip_h_image(image)
    flipVH_sample = flip_h_image(flip_v_image(image))
    rot1_sample = np.rot90(image)
    rot2_sample = np.rot90(rot1_sample)
    rot3_sample = np.rot90(rot2_sample)
    zoom = zoom_image(image)
    zoom_rot1 = np.rot90(zoom)
    zoom_rot2 = np.rot90(zoom_rot1)
    zoom_rot3 = np.rot90(zoom_rot2)
    return [image, flipV_sample, flipH_sample, flipVH_sample, rot1_sample, rot2_sample, rot3_sample, zoom, zoom_rot1, zoom_rot2, zoom_rot3]

def revert_tta(boxes, img_size):
    """Undo TTA in order to ensemble predictions"""
    sample0, flippedV, flippedH, flippedVH, rot1, rot2, rot3, zoomed, zoomed_rot1, zoomed_rot2, zoomed_rot3 = boxes
    sample1 = flip_v_boxes(flippedV, img_size)
    sample2 = flip_h_boxes(flippedH, img_size)
    sample3 = flip_v_boxes(flip_h_boxes(flippedVH, img_size), img_size)
    sample4 = rotate_boxes(rot1, img_size, 3)
    sample5 = rotate_boxes(rot2, img_size, 2)
    sample6 = rotate_boxes(rot3, img_size, 1)
    sample7 = unzoom_boxes(zoomed)
    sample8 = unzoom_boxes(rotate_boxes(zoomed_rot1, img_size, 3))
    sample9 = unzoom_boxes(rotate_boxes(zoomed_rot2, img_size, 2))
    sample10 = unzoom_boxes(rotate_boxes(zoomed_rot3, img_size, 1))
    return [sample0, sample1, sample2, sample3, sample4, sample5, sample6, sample7, sample8, sample9, sample10]

# def apply_tta(image):
#     """Apply Test Time Augmentation (TTA)"""
#     flipV_sample = flip_v_image(image)
#     return [image, flipV_sample]

# def revert_tta(boxes, img_size):
#     """Undo TTA in order to ensemble predictions"""
#     sample0, flippedV = boxes
#     sample1 = flip_v_boxes(flippedV, img_size)
#     return [sample0, sample1]

def zoom_image(image):
    zoom = np.zeros_like(image)
    zoom[100:900, 100:900] = cv2.resize(image, (800, 800))
    return zoom.astype(np.float32)

def unzoom_boxes(boxes):
    return (boxes - 100) * 1.28

def flip_v_image(image):
    return np.flip(image, axis=0)
    
def flip_h_image(image):
    return np.flip(image, axis=1)

def rotate_boxes(boxes, img_size, k):
    for _ in range(k):
        x0 = boxes[:, 1]
        y0 = img_size - boxes[:, 2]
        x1 = boxes[:, 3]
        y1 = img_size - boxes[:, 0]
        boxes = np.stack([x0, y0, x1, y1], axis=1)
    return boxes

def flip_v_boxes(boxes, img_size):
    y0 = img_size - boxes[:, 3]
    y1 = img_size - boxes[:, 1]
    boxes[:, 1] = y0
    boxes[:, 3] = y1
    return boxes
    
def flip_h_boxes(boxes, img_size):
    x0 = img_size - boxes[:, 2]
    x1 = img_size - boxes[:, 0]
    boxes[:, 0] = x0
    boxes[:, 2] = x1
    return boxes

def tensor_transform(sample):
    transform = A.Compose([ToTensorV2(p=1.0)], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
    sample = transform(**sample)
    return sample

def resize_transform(sample, image_size=1024):
    transform = A.Compose([A.Resize(height=image_size, width=image_size, p=1.0)], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))
    sample = transform(**sample)
    return sample

### Metric Definition

In [11]:
@jit(nopython=True)
def calculate_iou(gt, pr, form='pascal_voc') -> float:
    """Calculates the Intersection over Union.

    Args:
        gt: (np.ndarray[Union[int, float]]) coordinates of the ground-truth box
        pr: (np.ndarray[Union[int, float]]) coordinates of the prdected box
        form: (str) gt/pred coordinates format
            - pascal_voc: [xmin, ymin, xmax, ymax]
            - coco: [xmin, ymin, w, h]
    Returns:
        (float) Intersection over union (0.0 <= iou <= 1.0)
    """
    if form == 'coco':
        gt = gt.copy()
        pr = pr.copy()

        gt[2] = gt[0] + gt[2]
        gt[3] = gt[1] + gt[3]
        pr[2] = pr[0] + pr[2]
        pr[3] = pr[1] + pr[3]

    # Calculate overlap area
    dx = min(gt[2], pr[2]) - max(gt[0], pr[0]) + 1
    
    if dx < 0:
        return 0.0
    
    dy = min(gt[3], pr[3]) - max(gt[1], pr[1]) + 1

    if dy < 0:
        return 0.0

    overlap_area = dx * dy

    # Calculate union area
    union_area = (
            (gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1) +
            (pr[2] - pr[0] + 1) * (pr[3] - pr[1] + 1) -
            overlap_area
    )

    return overlap_area / union_area


@jit(nopython=True)
def find_best_match(gts, pred, pred_idx, threshold = 0.5, form = 'pascal_voc', ious=None) -> int:
    """Returns the index of the 'best match' between the
    ground-truth boxes and the prediction. The 'best match'
    is the highest IoU. (0.0 IoUs are ignored).

    Args:
        gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
        pred: (List[Union[int, float]]) Coordinates of the predicted box
        pred_idx: (int) Index of the current predicted box
        threshold: (float) Threshold
        form: (str) Format of the coordinates
        ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious.

    Return:
        (int) Index of the best match GT box (-1 if no match above threshold)
    """
    best_match_iou = -np.inf
    best_match_idx = -1

    for gt_idx in range(len(gts)):
        
        if gts[gt_idx][0] < 0:
            # Already matched GT-box
            continue
        
        iou = -1 if ious is None else ious[gt_idx][pred_idx]

        if iou < 0:
            iou = calculate_iou(gts[gt_idx], pred, form=form)
            
            if ious is not None:
                ious[gt_idx][pred_idx] = iou

        if iou < threshold:
            continue

        if iou > best_match_iou:
            best_match_iou = iou
            best_match_idx = gt_idx

    return best_match_idx

@jit(nopython=True)
def calculate_precision(gts, preds, threshold = 0.5, form = 'coco', ious=None) -> float:
    """Calculates precision for GT - prediction pairs at one threshold.

    Args:
        gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
        preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes,
               sorted by confidence value (descending)
        threshold: (float) Threshold
        form: (str) Format of the coordinates
        ious: (np.ndarray) len(gts) x len(preds) matrix for storing calculated ious.

    Return:
        (float) Precision
    """
    n = len(preds)
    tp = 0
    fp = 0
    
    # for pred_idx, pred in enumerate(preds_sorted):
    for pred_idx in range(n):

        best_match_gt_idx = find_best_match(gts, preds[pred_idx], pred_idx,
                                            threshold=threshold, form=form, ious=ious)

        if best_match_gt_idx >= 0:
            # True positive: The predicted box matches a gt box with an IoU above the threshold.
            tp += 1
            # Remove the matched GT box
            gts[best_match_gt_idx] = -1

        else:
            # No match
            # False positive: indicates a predicted box had no associated gt box.
            fp += 1

    # False negative: indicates a gt box had no associated predicted box.
    fn = (gts.sum(axis=1) > 0).sum()

    return tp / (tp + fp + fn)


@jit(nopython=True)
def calculate_image_precision(gts, preds, thresholds = (0.5, ), form = 'coco') -> float:
    """Calculates image precision.

    Args:
        gts: (List[List[Union[int, float]]]) Coordinates of the available ground-truth boxes
        preds: (List[List[Union[int, float]]]) Coordinates of the predicted boxes,
               sorted by confidence value (descending)
        thresholds: (float) Different thresholds
        form: (str) Format of the coordinates

    Return:
        (float) Precision
    """
    n_threshold = len(thresholds)
    image_precision = 0.0
    
    ious = np.ones((len(gts), len(preds))) * -1
    # ious = None

    for threshold in thresholds:
        precision_at_threshold = calculate_precision(gts.copy(), preds, threshold=threshold,
                                                     form=form, ious=ious)
        image_precision += precision_at_threshold / n_threshold

    return image_precision

### Network

In [12]:
def get_resnet():
    backbone = resnet.__dict__['resnet50'](pretrained=False, norm_layer=FrozenBatchNorm2d)
    for name, parameter in backbone.named_parameters():
        if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:
            parameter.requires_grad_(False)
    return backbone

class MyFasterRCNN(nn.Module):
    def __init__(self):
        super(MyFasterRCNN, self).__init__()
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(1024, 1024, image_mean, image_std)
        self.backbone = get_resnet()
        return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}
        self.body = IntermediateLayerGetter(self.backbone, return_layers=return_layers)

        # Feature Pyramid Network
        out_channels = 256
        in_channels_list = [256 * (2 ** i) for i in range(4)]
        self.fpn = FeaturePyramidNetwork(
            in_channels_list=in_channels_list,
            out_channels=out_channels,
            extra_blocks=LastLevelMaxPool())

        # Regional Proposal Network
        anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
        aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        head = RPNHead(out_channels, anchor_generator.num_anchors_per_location()[0])
        self.rpn = RegionProposalNetwork(
            anchor_generator=anchor_generator,
            head=head,
            fg_iou_thresh=0.7,
            bg_iou_thresh=0.3,
            batch_size_per_image=256,
            positive_fraction=0.5,
            pre_nms_top_n=dict(training=2000, testing=1000),
            post_nms_top_n=dict(training=2000, testing=1000),
            nms_thresh=0.7)

        # RoI heads
        representation_size = 512
        box_roi_pool = MultiScaleRoIAlign(['0', '1', '2', '3'], 7, 2)
        box_head = TwoMLPHead(out_channels * box_roi_pool.output_size[0] ** 2, representation_size)
        box_predictor = FastRCNNPredictor(representation_size, num_classes=2)
        self.roi_heads = RoIHeads(
            box_roi_pool=box_roi_pool,
            box_head=box_head,
            box_predictor=box_predictor,
            fg_iou_thresh=0.5, bg_iou_thresh=0.5,
            batch_size_per_image=512, positive_fraction=0.25,
            bbox_reg_weights=None,
            score_thresh=0.05,
            nms_thresh=0.5,
            detections_per_img=100)

    def forward(self, images, targets=None):
        images, targets = self.transform(images, targets)
        features = self.body(images.tensors)

        fpn_features = self.fpn(features)
        if isinstance(fpn_features, torch.Tensor): fpn_features = OrderedDict([('0', fpn_features)])
        proposals, proposal_losses = self.rpn(images, fpn_features, targets)
        detections, detector_losses = self.roi_heads(fpn_features, proposals, images.image_sizes, targets)

        losses = {}
        losses.update(detector_losses)
        losses.update(proposal_losses)

        return features, detections, losses
    

class ResNetFeaturizer(nn.Module):
    def __init__(self):
        super(ResNetFeaturizer, self).__init__()
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(1024, 1024, image_mean, image_std)
        self.backbone = get_resnet()
        self.backbone.load_state_dict(torch.load('/kaggle/input/pretrained-pytorch/resnet50-19c8e357.pth'))

    def forward(self, images, targets=None):
        images, targets = self.transform(images, targets)
        features = self.backbone(images.tensors) # images.tensors
        return features

In [13]:
def get_model(weights_path):
    model = MyFasterRCNN()
    num_classes = 2  # 1 class (wheat) + background
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.load_state_dict(torch.load(weights_path))
    model = model.cuda()
    model = model.eval()
    return model

model = get_model(weights_path)

In [14]:
def show_test_predictions():
    for images, image_ids in test_data_loader:
        images = [torch.tensor(image).cuda().permute(2,0,1) for image in images[0]]
        features, predictions, loss = model(images)
        break

    sample = images[0].permute(1,2,0).cpu().numpy()
    boxes, scores, labels = run_wbf(predictions, image_size=1024)
    boxes = boxes.astype(np.int32)

    fig, ax = plt.subplots(1, 1, figsize=(16, 8))

    for box in boxes:
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (220, 0, 0), 2)

    ax.set_axis_off()
    ax.imshow(sample)

In [15]:
test_dataset = WheatTestDataset(test_df, test_path, tta=False)
test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, drop_last=False, collate_fn=collate_fn)

In [16]:
# show_test_predictions()

In [17]:
def cluster_test_data():
    rn_model = ResNetFeaturizer().cuda().eval()
    
    feature_vecs = []
    image_ids = []
    for images, image_id in tqdm(test_data_loader):
        inp = [torch.tensor(images[0]).cuda().permute(2,0,1)]
        fv = rn_model(inp)
        feature_vecs.append(fv.detach().cpu().numpy())
        image_ids.append(image_id[0])
    feature_vecs = np.squeeze(np.array(feature_vecs))
    print(feature_vecs.shape)
    
    agglo = AgglomerativeClustering(n_clusters=N_CLUSTERS)
    clusters = agglo.fit_predict(feature_vecs)
    
#     data = [[i,j] for i,j in ]
#     cluster_df = pd.DataFrame(clusters, columns=['cluster_id'])
    id_to_cluster = {k : v for k,v in zip(image_ids, clusters)}
    
    del rn_model
    del inp
    del fv
    torch.cuda.empty_cache()
    
    return id_to_cluster

In [18]:
id_to_cluster = cluster_test_data()

100%|██████████| 10/10 [00:01<00:00,  5.51it/s]

(10, 1000)





In [19]:
# for cluster_id in range(N_CLUSTERS):
#     for img_id in cluster_df[cluster_df['cluster_id'] == cluster_id]['image_id'].values:
#         img = test_dataset.load_image(img_id)
#         plt.imshow(img)
#         plt.title('%d' % cluster_id)
#         plt.show()

# test_df = pd.concat([test_df, cluster_df], axis=1)
# test_df.head()

In [20]:
test_dataset = WheatTestDataset(test_df, test_path)
test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4, drop_last=False, collate_fn=collate_fn)

In [21]:
def get_pseudo_labels(combine_with_train=True):
    data = []
    for images, image_ids in test_data_loader:
        predictions = []
        for img in images[0]:
            inp = [torch.tensor(img).cuda().permute(2,0,1)]
            features, pred, loss = model(inp)
            predictions.extend(pred)

        boxes, scores, labels = run_wbf(predictions, image_size=image_size)
        boxes = (boxes).astype(np.int32).clip(min=0, max=int(image_size-1))
        image_id = image_ids[0]

        for box in boxes:
            x0, y0, x1, y1 = box
            w = x1 - x0
            h = y1 - y0
            data.append([image_id, 1024, 1024, 'test', x0, y0, w, h, x1, y1, image_id, join(test_path, image_id + '.jpg'), id_to_cluster[image_id]])
    s1_df = pd.DataFrame(data, columns=['image_id', 'width', 'height', 'source', 'x', 'y', 'w', 'h', 'x1', 'y1', 'image_id_orig', 'image_path', 'cluster_id'])
    if combine_with_train:
        s1_df = pd.concat([s1_df, train_df])
    return s1_df

In [22]:
def train_round(num, model):
    s1_df = get_pseudo_labels(combine_with_train=False)
    
    for cluster_id in range(N_CLUSTERS):
        train_s1_df = s1_df[s1_df['cluster_id'] == cluster_id]
        train_dataset = WheatTrainDataset(train_s1_df, get_train_transforms())
        train_data_loader = DataLoader(train_dataset, batch_size=1, num_workers=4, drop_last=False, collate_fn=collate_fn)

        model.load_state_dict(torch.load(weights_path))
        optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
        
        for epoch in range(PL_EPOCHS):
            model.train()
            for step, (images, targets, image_ids) in enumerate(train_data_loader):
                # Load images/targets to cuda
                images = [img.cuda() for img in images]
                targets = [{k: v.cuda() for k, v in l.items()} for l in targets]
                # Send images through network
                features, detections, losses = model(images, targets)
                # Optimizer step
                optimizer.zero_grad()
                loss_FS = sum(losses.values())
                loss_FS.backward()
                optimizer.step()
        
        torch.save(model.state_dict(), 'cluster_%d.pth' % cluster_id)
    return s1_df

In [23]:
for round_num in range(PL_ROUNDS):
    s1_df = train_round(round_num, model)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


### Make Final Predictions

In [24]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [25]:
test_df['cluster_id'] = [id_to_cluster[iid] for iid in test_df['image_id'].values]

In [26]:
detection_threshold = 0.5
image_size = 1024
results = []

for cluster_id in range(N_CLUSTERS):
    model.load_state_dict(torch.load('cluster_%d.pth' % cluster_id))
    model.eval()

    test_df_cluster = test_df[test_df['cluster_id'] == cluster_id]
    test_dataset = WheatTestDataset(test_df_cluster, test_path)
    test_data_loader = DataLoader(test_dataset, batch_size=1, num_workers=4, drop_last=False, collate_fn=collate_fn)
    
    for images, image_ids in test_data_loader:
        image_id = image_ids[0]
    
        predictions = []
        for img in images[0]:
            inp = [torch.tensor(img).cuda().permute(2,0,1)]
            features, pred, loss = model(inp)
            predictions.extend(pred)

        boxes, scores, labels = run_wbf(predictions, image_size=image_size)
        boxes = boxes.astype(np.int32).clip(min=0, max=int(image_size-1))
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]

        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }
        results.append(result)

In [27]:
sub_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
sub_df.head()

Unnamed: 0,image_id,PredictionString
0,aac893a91,0.9994 558 533 125 188 0.9993 245 86 132 146 0...
1,51f1be19e,0.9993 610 88 152 168 0.9987 71 692 126 216 0....
2,51b3e36ab,0.9999 543 30 248 129 0.9999 688 612 334 130 0...
3,348a992bb,0.9998 733 223 141 88 0.9998 597 444 123 97 0....
4,cc3532ff6,0.9999 768 828 168 165 0.9998 472 404 127 151 ...


In [28]:
sub_df.to_csv('submission.csv', index=False)