In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F 
import numpy as np
from collections import OrderedDict 
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import pandas as pd
import json
from PIL import Image


In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
device

device(type='cuda', index=0)

In [3]:
class TransitionL(nn.Module):
    def __init__(self, in_features, out_features):
        super(TransitionL, self).__init__()
        self.transit = nn.Sequential(
            nn.BatchNorm2d(in_features),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_features, out_features, 1),
            nn.AvgPool2d(kernel_size=2, stride=2))
    def forward(self, x):
        return self.transit(x)
        

In [4]:
test1 = TransitionL(1, 20)
test1(torch.ones([1, 1, 256, 256])).shape

torch.Size([1, 20, 128, 128])

In [5]:
# Hyper params
B = 2 # Num bounding box in one grid cell
S = 7 # Num gridcell
C = 1 # Num classes
IMG_SIZE = 448


In [6]:
class _DenseLayer(nn.Module):
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
        super(_DenseLayer, self).__init__()
        self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
        self.add_module('relu1', nn.ReLU(inplace=True)),
        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
                                           growth_rate, kernel_size=1, stride=1,
                                           bias=False)),
        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
        self.add_module('relu2', nn.ReLU(inplace=True)),
        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
                                           kernel_size=3, stride=1, padding=1,
                                           bias=False)),
        self.drop_rate = float(drop_rate)
        self.memory_efficient = memory_efficient

    def bn_function(self, inputs):
        "Bottleneck function"
        # type: (List[Tensor]) -> Tensor
        concated_features = torch.cat(inputs, 1)
        bottleneck_output = self.conv1(self.relu1(self.norm1(concated_features)))  # noqa: T484
        return bottleneck_output

    def forward(self, input):  
        if isinstance(input, torch.Tensor):
            prev_features = [input]
        else:
            prev_features = input

        bottleneck_output = self.bn_function(prev_features)
        new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate,
                                     training=self.training)
        return new_features
    
class _DenseBlock(nn.ModuleDict):
    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
        super(_DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = _DenseLayer(
                num_input_features + i * growth_rate,
                growth_rate=growth_rate,
                bn_size=bn_size,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient,
            )
            self.add_module('denselayer%d' % (i + 1), layer)

    def forward(self, init_features):
        features = [init_features]
        for name, layer in self.items():
            new_features = layer(features)
            features.append(new_features)
        return torch.cat(features, 1)

In [7]:
class DenseNet(nn.Module):
    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
                 num_init_features=64, bn_size=4, drop_rate=0, memory_efficient=False):

        super(DenseNet, self).__init__()

        # Convolution and pooling part from table-1
        self.features = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2,
                                padding=3, bias=False)),
            ('norm0', nn.BatchNorm2d(num_init_features)),
            ('relu0', nn.ReLU(inplace=True)),
            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        ]))

        # Add multiple denseblocks based on config 
        # for densenet-121 config: [6,12,24,16]
        num_features = num_init_features
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(
                num_layers=num_layers,
                num_input_features=num_features,
                bn_size=bn_size,
                growth_rate=growth_rate,
                drop_rate=drop_rate,
                memory_efficient=memory_efficient
            )
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                # add transition layer between denseblocks to 
                # downsample
                trans = TransitionL(num_features,
                                    num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2

        self.lastconv = nn.Conv2d(num_features, num_features, 1, 2)
        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        features = self.features(x)
        features_map = self.lastconv(features)
        return features_map

In [8]:
model = DenseNet()

In [9]:
a = torch.ones([1, 3, 448, 448])

In [10]:
b = model(a)

In [11]:
b.shape

torch.Size([1, 1024, 7, 7])

In [12]:
class YOLOD(nn.Module):
    def __init__(self):
        super(YOLOD, self).__init__()
        self.feature_extractor = DenseNet()
        self.grid = S
        self.num_classes = C

        self.linear_layers = nn.Sequential(
            nn.Linear(S*S*1024, 4096),
            nn.Dropout(p=0.1), 
            nn.LeakyReLU(0.1, inplace=True),
            nn.Linear(4096, self.grid*self.grid*(self.num_classes + B*5))
        )
        
    def forward(self, x):
        features = self.feature_extractor(x)
        flatten = torch.flatten(features)
        print(flatten.size())
        flatten = flatten.view(x.size()[0], -1)
        print(flatten.size())

        linear_vec = self.linear_layers(flatten)
        output = linear_vec.view(-1, self.grid, self.grid, self.num_classes + B*5)
        return output
    

In [13]:
yoloS = YOLOD()

In [14]:
out = yoloS(torch.ones([2, 3, 448, 448]))

torch.Size([100352])
torch.Size([2, 50176])


In [15]:
total = 0
for params in yoloS.named_parameters():
    
    l = params[1].detach().numpy().ravel()
    total += l
print(total)
    

ValueError: operands could not be broadcast together with shapes (9408,) (64,) (9408,) 

In [16]:
preprocess = {"train": transforms.Compose([
    transforms.Resize(448),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),]),
               "test": transforms.Compose([
    transforms.Resize(448),
    transforms.ToTensor(),
])
}

In [2]:
link = "../../../yolo-pytorch/data/global-wheat-detection/train.csv"

image_link = "../../../yolo-pytorch/data/global-wheat-detection/train"

class GlobalWheatData(Dataset):
    def __init__(self, csv_file, image_link, preprocess, img_size = 448, mode = "train"):
        
        super(GlobalWheatData, self).__init__()
        self.file = csv_file
        self.img_size = img_size
        self.wheat_size = 1024
        self.image_link = image_link
        self.mode = mode
        self.preprocess = preprocess
        self.data_x = []
        self.data_y = []
        self.load_data()
    def load_data(self):
        df = pd.read_csv(self.file)
        box_coord = df[["image_id", "bbox"]].groupby("image_id")["bbox"].apply(list).reset_index()
        mapDict = {k:v for k, v in zip(box_coord["image_id"], box_coord["bbox"])}
        N = len(mapDict.keys())
        X = np.zeros((N, self.img_size, self.img_size, 3), dtype='uint8')
        for idx, (id, boxes) in enumerate(mapDict.items()):
            image_name = self.image_link + "/" + id + ".jpg"
            X = Image.open(image_name)
            img_tensor = self.preprocess_img(X)            
            y = np.zeros((S, S, 5*B+ C))
            for i, box in enumerate(boxes):
                box = json.loads(box)
                xmin, ymin, w, h = box[0], box[1], box[2], box[3]
                # convert coord from 1024 image size to 448 image size
                xmin, ymin, w, h = xmin/self.wheat_size * 448, ymin/1024 * 448, w/1024 * 448, h/1024 * 448
                x_center, y_center = (xmin+w)/2, (ymin+h)/2
                x_idx, y_idx = int(x_center/self.img_size * S), int(y_center/self.img_size * S)
                y[x_idx, y_idx] = 1, int(x_center), int(y_center), int(w), int(h), 1, int(x_center), int(y_center), int(w), int(h), 1
            
            self.data_x.append(img_tensor)
            self.data_y.append(y)
            break
    def preprocess_img(self, img):
        if self.mode == "train":
            return_img = self.preprocess[self.mode](img)
        elif self.mode == "test":
            return_img = self.preprocess[self.mode](img)
        else:
            raise Exception("Wrong mode")
        return return_img
    def __getitem__(self, idx):
        X = self.data_x[idx]
        y = self.data_y[idx]
        return X, y
        

                
            
        
        
        
        

In [18]:
handler = GlobalWheatData(link, image_link, preprocess)

In [3]:
test = pd.read_csv(link)
test.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [5]:
test[["image_id", "bbox"]].groupby("image_id")["bbox"].apply(list).reset_index()

Unnamed: 0,image_id,bbox
0,00333207f,"[[0, 654, 37, 111], [0, 817, 135, 98], [0, 192..."
1,005b0d8bb,"[[765.0, 879.0, 116.0, 79.0], [84.0, 539.0, 15..."
2,006a994f7,"[[437.0, 988.0, 98.0, 36.0], [309.0, 527.0, 11..."
3,00764ad5d,"[[89.0, 256.0, 113.0, 107.0], [216.0, 282.0, 1..."
4,00b5fefed,"[[709.0, 97.0, 204.0, 105.0], [775.0, 250.0, 1..."
...,...,...
3368,ffb445410,"[[0.0, 534.0, 54.0, 118.0], [0.0, 480.0, 38.0,..."
3369,ffbf75e5b,"[[0, 697, 21, 58], [104, 750, 77, 75], [65, 84..."
3370,ffbfe7cc0,"[[256.0, 0.0, 64.0, 99.0], [390.0, 0.0, 48.0, ..."
3371,ffc870198,"[[447.0, 976.0, 78.0, 48.0], [18.0, 141.0, 218..."


In [19]:
df = handler.load_data()

In [20]:
X, y = handler[0]

In [21]:
pred = yoloS(X.unsqueeze(0))

torch.Size([50176])
torch.Size([1, 50176])


In [None]:
!pwd

In [None]:
target_tensor.shape

In [22]:
y.shape

(7, 7, 11)

In [23]:
y = torch.from_numpy(y).unsqueeze(0)

In [24]:
y.shape

torch.Size([1, 7, 7, 11])

In [25]:
coo_mask = y[..., 0] == 1
noo_mask = y[..., 0] == 0

In [26]:
coo_mask.shape

torch.Size([1, 7, 7])

In [27]:
coo_mask = coo_mask.unsqueeze(-1).expand_as(y)

In [28]:
coo_mask.shape

torch.Size([1, 7, 7, 11])

In [29]:
coo_pred_mask = pred[coo_mask]
coo_pred_mask.shape

torch.Size([176])

In [30]:
coo_pred_mask = coo_pred_mask.view(-1, 11)

In [31]:
noo_target_mask = pred[noo_mask]
noo_target_mask.shape

torch.Size([33, 11])

In [32]:
coo_pred_mask.view(-1, 11)

tensor([[-0.4782, -0.2865, -0.3194,  0.0363,  0.0979,  0.5161, -0.1517, -0.1302,
          0.3840, -0.0491, -0.3200],
        [ 0.2188,  0.1053, -0.1791, -0.1166,  0.3733, -0.5443, -0.0927,  0.1693,
         -0.3498,  0.1636,  0.2443],
        [ 0.0245,  0.1688,  0.2279, -0.3182,  0.0791,  0.3367, -0.2535,  0.2820,
         -0.2257, -0.1069, -0.2360],
        [ 0.3293,  0.1579, -0.2639,  0.1323, -0.2710,  0.4613, -0.1528,  0.3648,
         -0.0500, -0.0994,  0.1221],
        [-0.4594,  0.2573, -0.3525,  0.0715, -0.0285,  0.5301, -0.0372,  0.0631,
         -0.4290, -0.1701, -0.4195],
        [-0.6249, -0.4430, -0.1884,  0.2141,  0.0109, -0.1479, -0.3562, -0.0515,
         -0.5866, -0.0300, -0.0970],
        [ 0.2597, -0.2929,  0.1352,  0.4356, -0.1567, -0.0259, -0.2835, -0.5129,
          0.2261, -0.6344,  0.2946],
        [ 0.0536, -0.6549,  0.0992, -0.0057,  0.3611,  0.1571,  0.3575,  0.4274,
          0.4644,  0.8024, -0.0475],
        [-0.0253, -0.1476,  0.1739, -0.1054,  0.6220,  0

In [33]:
coo_target_mask = y[coo_mask]

In [34]:
coo_target_mask.view(-1, 11)

tensor([[  1.,  20.,  43.,  38.,  25.,   1.,  20.,  43.,  38.,  25.,   1.],
        [  1.,  53.,  80.,  30.,  23.,   1.,  53.,  80.,  30.,  23.,   1.],
        [  1.,   8., 167.,  16.,  48.,   1.,   8., 167.,  16.,  48.,   1.],
        [  1.,  29., 200.,  59.,  42.,   1.,  29., 200.,  59.,  42.,   1.],
        [  1., 126.,  57.,  47.,  23.,   1., 126.,  57.,  47.,  23.,   1.],
        [  1., 103., 120.,  22.,  21.,   1., 103., 120.,  22.,  21.,   1.],
        [  1., 106., 154.,  30.,  31.,   1., 106., 154.,  30.,  31.,   1.],
        [  1., 116., 209.,  27.,  26.,   1., 116., 209.,  27.,  26.,   1.],
        [  1., 177.,  56.,  44.,  35.,   1., 177.,  56.,  44.,  35.,   1.],
        [  1., 139., 104.,  38.,  32.,   1., 139., 104.,  38.,  32.,   1.],
        [  1., 188., 129.,  70.,  38.,   1., 188., 129.,  70.,  38.,   1.],
        [  1., 170., 211.,  76.,  39.,   1., 170., 211.,  76.,  39.,   1.],
        [  1., 202.,  37.,  28.,  22.,   1., 202.,  37.,  28.,  22.,   1.],
        [  1

In [35]:
pred_noo_mask = 

SyntaxError: invalid syntax (<ipython-input-35-05688ed7254c>, line 1)

In [36]:
pred_conf = coo_pred_mask[:, 0]
target_conf = coo_target_mask[:, 0]

IndexError: too many indices for tensor of dimension 1

In [None]:
test = torch.zeros_like(torch.ones(16, 11))


In [None]:
test[..., 0] = 1

In [None]:
test

In [37]:
coo_pred_mask[torch.BoolTensor(test == 1)]

NameError: name 'test' is not defined

In [38]:
coo_pred_mask

tensor([[-0.4782, -0.2865, -0.3194,  0.0363,  0.0979,  0.5161, -0.1517, -0.1302,
          0.3840, -0.0491, -0.3200],
        [ 0.2188,  0.1053, -0.1791, -0.1166,  0.3733, -0.5443, -0.0927,  0.1693,
         -0.3498,  0.1636,  0.2443],
        [ 0.0245,  0.1688,  0.2279, -0.3182,  0.0791,  0.3367, -0.2535,  0.2820,
         -0.2257, -0.1069, -0.2360],
        [ 0.3293,  0.1579, -0.2639,  0.1323, -0.2710,  0.4613, -0.1528,  0.3648,
         -0.0500, -0.0994,  0.1221],
        [-0.4594,  0.2573, -0.3525,  0.0715, -0.0285,  0.5301, -0.0372,  0.0631,
         -0.4290, -0.1701, -0.4195],
        [-0.6249, -0.4430, -0.1884,  0.2141,  0.0109, -0.1479, -0.3562, -0.0515,
         -0.5866, -0.0300, -0.0970],
        [ 0.2597, -0.2929,  0.1352,  0.4356, -0.1567, -0.0259, -0.2835, -0.5129,
          0.2261, -0.6344,  0.2946],
        [ 0.0536, -0.6549,  0.0992, -0.0057,  0.3611,  0.1571,  0.3575,  0.4274,
          0.4644,  0.8024, -0.0475],
        [-0.0253, -0.1476,  0.1739, -0.1054,  0.6220,  0

In [42]:
c = torch.cat((noo_target_mask[..., 0:4], noo_target_mask[..., 5:8]), 1)

In [43]:
b = torch.ones_like(c)

In [46]:
b.shape

torch.Size([33, 7])

In [47]:
c.shape

torch.Size([33, 7])

In [50]:
def a(*s):
    a, b, c = s
    print(a, b ,c)
a(1, 2, 3)

1 2 3
