--------------

# Get the size of the largest image in test

In [52]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir('../test/') if isfile(join('../test/', f))]
mh, mw = 0, 0
for f in onlyfiles:
    w, h, pixels, _ = png.Reader(filename='../test/'+f).read_flat()
    mh = max(mh, h)
    mw = max(mw, w)
(mh, mw)

(3408, 3320)

Since most of the images are of a different size, I would like to try a binning approach where I cluster together images of the same height and width. For each clusters, have their own dataloaders.

# Create CSV with image height and width

In [1]:
import pandas as pd
import numpy as np
import png
csv = pd.read_csv('./augmented_train.csv')
images = csv['image_id'].values
images = np.unique(images)
img, height, width = [], [], []
for e,i in enumerate(images):
    w, h, pixels, _ = png.Reader(filename=f'../train2/train/{i}.png').read_flat()
    height.append(h)
    width.append(w)
    img.append(i)
    print(e, "of", len(images), end='       \r')
pd.DataFrame({'image_id':img, 'height':height, 'width':width})#.to_csv('resized_height_width.csv', index=False)

59999 of 60000       60000              

Unnamed: 0,image_id,height,width
0,000434271f63a053c4128a0ba6352c7f,416,343
1,000434271f63a053c4128a0ba6352c7f_flipped,416,343
2,000434271f63a053c4128a0ba6352c7f_flipped_inverted,416,343
3,000434271f63a053c4128a0ba6352c7f_inverted,416,343
4,00053190460d56c53cc3e57321387478,416,342
...,...,...,...
59995,fff0f82159f9083f3dd1f8967fc54f6a_inverted,416,341
59996,fff2025e3c1d6970a8a6ee0404ac6940,416,386
59997,fff2025e3c1d6970a8a6ee0404ac6940_flipped,416,386
59998,fff2025e3c1d6970a8a6ee0404ac6940_flipped_inverted,416,386


In [2]:
pd.DataFrame({'image_id':img, 'height':height, 'width':width}).to_csv('resized_height_width.csv', index=False)

In [49]:
np.max(list(size_bins.keys()), axis=0)

array([3408, 3320])

It seems that the approach of binning into size clusters is not going to work since there are a lot of clusters with only one data point.

# Sorting the images into bins based on size

In [35]:
def get_size_bins(file='./height_width.csv'):
    height_width_csv = pd.read_csv(file)
    size_bins = {}
    for r in height_width_csv.values:
        i, h, w = r
        if (h,w) not in size_bins.keys():
            size_bins[(h,w)] = [i]
        else:
            size_bins[(h,w)].append(i)
    
    for k in size_bins:
        size_bins[k] = np.unique(size_bins[k])
    
    return size_bins

-------------

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import math
import png
from skimage import exposure
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim
import time
from torch.utils.data import Dataset, DataLoader


def read_pixels(filename):
    w, h, pixels, _ = png.Reader(filename='../train/'+filename+'.png').read_flat()
    image = np.array(pixels).reshape(h,w)
    return image


def seed_everything(SEED=42):
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True

seed_everything(64)

class ImageDataset(Dataset):
    def __init__(self, path, csv):
        self.X = path
        self.csv = csv
        self.max_h, self.max_w = 3408, 3320

    def __len__(self):
        return (len(self.X))

    def __pad__(self, array):
        h,w = array.shape
        pt = (self.max_h - h) // 2 # padding top
        pb = self.max_h - pt - h # padding bottom
        pl = (self.max_w - w) // 2 # padding left
        pr = self.max_w - pl - w # padding right
        return np.pad(array, pad_width=((pt, pb), (pl, pr)), constant_values=((0,0),(0,0))), pt, pl
    
    def __getitem__(self, i):
        filename = self.X[i]
        
        w, h, pixels, _ = png.Reader(filename=f'../train/{filename}.png').read_flat()
        image = np.array(pixels).reshape(h,w)
#         image = exposure.equalize_adapthist(image)
        image, pt, pl = self.__pad__(image)
        assert(image.shape == (self.max_h, self.max_w))
        image = np.expand_dims(image, axis=0)
        image = torch.tensor(image/255.0, dtype=torch.float16)
        
        csv = self.csv[self.csv['image_id']==filename]
        class_ids = csv['class_id'].values
        
        bboxes = csv.values[:, 4:]
        if np.isnan(np.sum(bboxes)):
            bboxes[:, [0, 1]] = 0.0
            bboxes[:, [2, 3]] = 1.0
        else:
            bboxes = np.vectorize(lambda x: int(x))(bboxes)
            bboxes[:, 0::2] = (bboxes[:, 0::2] + pl) / w  # normalize the x coordinates
            bboxes[:, 1::2] = (bboxes[:, 1::2] + pt) / h  # normalize the y coordinates
        
        output_bboxes = np.zeros(14*4)
        if (class_ids == 14).any(): # if there is no abnormality
            lbl = np.zeros(15)
        else:
            l = np.zeros(15)
            l[0] = 1. # there is an object of interest
            l[class_ids] = 1. # set the expected probabilities to 1.0
            lbl = l
            for i,c in enumerate(class_ids):
                output_bboxes[c*4:(c+1)*4] = bboxes[i]
        
        probs_bboxes = np.hstack((lbl, output_bboxes)).astype(np.float16)
#         probs = torch.tensor(lbl, dtype=torch.long)
#         bboxes = torch.tensor(bboxes, dtype=torch.float)
        probs_bboxes = torch.tensor(probs_bboxes, dtype=torch.float16)
        return image, probs_bboxes

class model1(nn.Module):
    """
    Fully convolutional baseline model that is not YOLO
    """
    
    def __init__(self, layer_config, lr):
        super(model1, self).__init__()
        self.learning_rate = lr
        
        self.loss = nn.MSELoss(reduction='mean')
        self.LeakyReLU = nn.LeakyReLU(0.001)
        self.sigmoid = nn.Sigmoid()
        self.conv_layers = []
        self.bn_layers = []
        
        for (k,v) in layer_config.items():
            if k == 'input_conv':
                self.input = nn.Conv2d(v['in_channel'], 
                                       v['out_channel'], 
                                       kernel_size = v['kernel_size'],
                                       stride = v['stride'])
                self.batch_input_norm = nn.BatchNorm2d(v['out_channel'])
                
            elif k == 'output':
                self.output = nn.Conv2d(v['in_channel'], 
                                        v['out_channel'], 
                                        kernel_size = v['kernel_size'],
                                        stride = v['stride'])
            else:
                self.conv_layers.append(nn.Conv2d(v['in_channel'], 
                                                  v['out_channel'], 
                                                  kernel_size = v['kernel_size'],
                                                  stride = v['stride']))
                self.bn_layers.append(nn.BatchNorm2d(v['out_channel']))
                self.add_module(k, self.conv_layers[-1])
                self.add_module(f'batch_norm_{k}', self.bn_layers[-1])
    
        self.optimizer = None
    
    
    def forward(self, X):
        y = self.input(X)
        y = self.LeakyReLU(y)
        y = self.batch_input_norm(y)
        
        for c,b in zip(self.conv_layers, self.bn_layers):
            y = c(y)
            y = self.LeakyReLU(y)
            y = b(y)
        
        y = self.output(y)
        y = self.sigmoid(y)
#         print(y.shape)
        assert(y.shape[2:] == (1,1))
        return y
    
    
    def backward(self, y_hat, targets):
        y_hat = y_hat.squeeze()
        row_mask = (targets[:, 0] == 0.0)
        y_hat[row_mask][:, 1:] = targets[row_mask][:, 1:]
        
        loss = self.loss(y_hat, targets)
        loss_str = loss.item()
        
        self.optimizer.zero_grad()
        loss.backward()
        self.float()
        self.optimizer.step()
        
        self.input.half()
        for i in self.conv_layers:
            i.half()
        self.output.half()
        
        return loss_str
    
    
    def predict(self, X):
        lo, hi = 0.4, 0.8
        Y = self.forward(X)
        Y = Y.squeeze()
        output = []
        for r in Y:
            prob = r[0]
            if prob <= lo:
                output.append('14 1 0 0 1 1')
            elif lo < prob <= hi:
                output.append(f'14 {prob} 0 0 1 1')
            elif prob > hi:
                string = ''
                classs_probs = r[1:15]
                bboxes = r[15:]
                for i,cp in enumerate(class_probs):
                    if cp > 0.4:
                        x1, y2, x2, y2 = bboxes[i*4 : (i+1)*4]
                        string += f'{i} {cp} {x1} {y1} {x2} {y2} '
                print(string)
                output.append(string)
        return output
    
    
    def save(self, state, path='./'):
        torch.save(state, path)
        
        
    def load(self, checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        self.load_state_dict(checkpoint['state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer'])


layer_config = {
    'input_conv': {
        'in_channel': 1,
        'out_channel': 32,
        'kernel_size': 11,
        'stride': 5
    },
    
    'conv_1': {
        'in_channel': 32,
        'out_channel': 64,
        'kernel_size': 11,
        'stride': 5
    },
    
    'conv_2': {
        'in_channel': 64,
        'out_channel': 128,
        'kernel_size': 11,
        'stride': 7
    },
    
    'output': {
        'in_channel': 128,
        'out_channel': 1 + 14 + 14 * 4,
        'kernel_size': 11,
        'stride': 9
    }
}

model = model1(layer_config, 1e-5)
model.half()  # convert to half precision
for layer in model.modules():
    if isinstance(layer, nn.BatchNorm2d):
        layer.float()
model.optimizer = optim.Adam(model.parameters(), lr=1e-6, weight_decay=1e-3)
model.cuda()
print(model)

train_data = pd.read_csv(f'../augmented_train.csv')
xtrain = np.unique(train_data['image_id'].values)

batch_size = 16
train_data = ImageDataset(xtrain, train_data)
trainloader = DataLoader(train_data, 
                         batch_size=batch_size, 
                         shuffle=True, 
                         pin_memory=True,
                         num_workers=min(batch_size, 12))
iterations = 2
train = False
if train:
    for epoc in range(iterations):
        losses = []
        start_time = time.time()
        for i, (x, y) in enumerate(trainloader):
            Y_hat = model(x.cuda())
            losses.append(model.backward(Y_hat, y.cuda()))
            print(f"loss on batch {i}:", np.round(losses[-1], 2), end='        \r')
        losses = np.round(np.mean(losses), 2)
        elapsed = time.time() - start_time
        eta = np.round(elapsed * (iterations - epoc - 1) / 3600, 2)
        print("epoch:", (epoc + 1), " | losses:", losses, " | ETA:", eta, "hours")
        checkpoint = {
            'epoch': epoc,
            'state_dict': model.state_dict(),
            'optimizer': model.optimizer.state_dict()
        }
        model.save(checkpoint, './model_1.pt')

model1(
  (loss): MSELoss()
  (LeakyReLU): LeakyReLU(negative_slope=0.001)
  (sigmoid): Sigmoid()
  (input): Conv2d(1, 32, kernel_size=(11, 11), stride=(5, 5))
  (batch_input_norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_1): Conv2d(32, 64, kernel_size=(11, 11), stride=(5, 5))
  (batch_norm_conv_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_2): Conv2d(64, 128, kernel_size=(11, 11), stride=(7, 7))
  (batch_norm_conv_2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (output): Conv2d(128, 71, kernel_size=(11, 11), stride=(9, 9))
)


In [5]:
from os import listdir
from os.path import isfile, join
from itertools import chain

class TestDataset(Dataset):
    def __init__(self, path):
        self.X = path
        self.max_h, self.max_w = 3408, 3320

    def __len__(self):
        return (len(self.X))

    def __pad__(self, array):
        h,w = array.shape
        pt = (self.max_h - h) // 2 # padding top
        pb = self.max_h - pt - h # padding bottom
        pl = (self.max_w - w) // 2 # padding left
        pr = self.max_w - pl - w # padding right
        return np.pad(array, pad_width=((pt, pb), (pl, pr)), constant_values=((0,0),(0,0))), pt, pl
    
    def __getitem__(self, i):
        filename = self.X[i]
        
        w, h, pixels, _ = png.Reader(filename=f'../test/{filename}.png').read_flat()
        image = np.array(pixels).reshape(h,w)
#         image = exposure.equalize_adapthist(image)
        image, pt, pl = self.__pad__(image)
        assert(image.shape == (self.max_h, self.max_w))
        image = np.expand_dims(image, axis=0)
        image = torch.tensor(image/255.0, dtype=torch.float16)
        
        return image

    
xtrain = [f.strip('.png') for f in listdir('../test/') if isfile(join('../test/', f))]
batch_size = 32
trainloader = DataLoader(TestDataset(xtrain), 
                         batch_size=batch_size, 
                         shuffle=False, 
                         num_workers=min(batch_size, 12))

start_time = time.time()
model.eval()
model.load('./model_1.pt')
predictions = []
for i,x in enumerate(trainloader):
    print(f'batch {(i+1)*batch_size} of {len(xtrain)}', end='            \r')
    predictions += model.predict(x.cuda())
print(f'took {time.time() - start_time} seconds')
pd.DataFrame({'image_id':xtrain, 'PredictionString':predictions}).to_csv('../predictions.csv', index=False)

took 649.1487894058228 seconds
