In [2]:
from __future__ import print_function, division
import copy
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import time
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms, utils
import torchvision
print(torch.__version__)
print(torchvision.__version__)

%matplotlib inline

0.4.1
0.2.1


In [2]:
class DeepLesionDataset(Dataset):
    """DeepLesion dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.data_frame = pd.read_table(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):


        img_name = os.path.join(self.root_dir,
                                os.path.join(
                                    *self.data_frame.iloc[idx, 0].rsplit('_', 1)
                                ))

        image = io.imread(img_name).astype(np.int32)
#         image = np.expand_dims(image, axis=2)
        image = image - 32768
        image = image.astype(np.int32)
        

        
        img_min = image.min()
        img_max = image.max()
        
        low_bound = -500
        high_bound = 500
        
        
        channel_1 = (((image-img_min)/(low_bound-img_min))*255).clip(0, 255)
        channel_2 = (((image-low_bound)/(high_bound-low_bound))*255).clip(0, 255)
        channel_3 = (((image-high_bound)/(img_max-high_bound))*255).clip(0, 255)

        image = np.zeros((3, image.shape[0], image.shape[1]))
        image[0] = channel_1
        image[1] = channel_2
        image[2] = channel_3
        image = image.astype(np.uint8).transpose(1,2,0)

        bounding_box = np.array(self.data_frame.iloc[idx].Bounding_boxes.split(','))
        bounding_box = bounding_box.astype('float').reshape(-1, 2)
        label = self.data_frame.iloc[idx].Coarse_lesion_type.astype(int) - 1




#         bounding_box = [c_x1, c_y1, c_x2, c_y2]

        
      
        c_x =  ((bounding_box[1][0] + bounding_box[0][0])/2) / image.shape[0]
        c_y =  ((bounding_box[1][1] + bounding_box[0][1])/2) / image.shape[1]
        c_hw = (bounding_box[1][0] - bounding_box[0][0]) / image.shape[0]
        c_hh = (bounding_box[1][1] - bounding_box[0][1]) / image.shape[1]
#         c_x1 = bounding_box[0][0] / image.shape[0]
#         c_y1 = bounding_box[0][1] / image.shape[1]
#         c_x2 = bounding_box[1][0] / image.shape[0]
#         c_y2 = bounding_box[1][1] / image.shape[1]

       
        grid_dim = 7
        target = torch.zeros((grid_dim,grid_dim,11))
        
        cell_dim = 1 / grid_dim

        target[int(c_y // cell_dim), int(c_x // cell_dim),0] = c_x
        target[int(c_y // cell_dim), int(c_x // cell_dim),1] = c_y
        target[int(c_y // cell_dim), int(c_x // cell_dim),2] = c_hw
        target[int(c_y // cell_dim), int(c_x // cell_dim),3] = c_hh
        target[int(c_y // cell_dim), int(c_x // cell_dim),4] = 1 # b1_c
        
        target[int(c_y // cell_dim), int(c_x // cell_dim),5] = c_x
        target[int(c_y // cell_dim), int(c_x // cell_dim),6] = c_y
        target[int(c_y // cell_dim), int(c_x // cell_dim),7] = c_hw
        target[int(c_y // cell_dim), int(c_x // cell_dim),8] = c_hh
        target[int(c_y // cell_dim), int(c_x // cell_dim),9] = 1 # b2_c
        target[int(c_y // cell_dim), int(c_x // cell_dim),10] = 1 # c_c
                
        if self.transform:
            image = self.transform(image)
    
    
        return image, target

d = DeepLesionDataset('../sets/train_set_small.tsv', '/media/mark/Data/deeplesion/Images_png',
                     transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                     ])
                     )

d[193]
# for i in d:
#     pass
# print(d[0][0].numpy().transpose(1, 2, 0).shape)
# for row in d[0][0].squeeze().numpy():
#     print(row)
# plt.close('all')
# fig,ax = plt.subplots(1, figsize=(5,5))
# fig = plt.figure(figsize=(5, 5))
# _ = plt.imshow(d[0][0].squeeze().numpy())
# plt.show()
# bb_x = d['bounding_box'][0,0]
# bb_y = d['bounding_box'][0,1]
# bb_w = d['bounding_box'][1,0] - d['bounding_box'][0,0] 
# bb_h = d['bounding_box'][0,1] - d['bounding_box'][1,1]
# rect1 = patches.Rectangle((bb_x,bb_y),bb_w,bb_h,linewidth=1,edgecolor='r',facecolor='none')
# ax.add_patch(rect1)

(tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],
 
         [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          ...,
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],
 
         [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
          [-1.8044, -1.8044,

In [3]:
torch.tensor([[1., -1.], [1., -1.]]).size()

torch.Size([2, 2])

In [4]:
def imshow(inp, bb, pred, title=None):
    """Imshow for Tensor."""
#     print(inp.numpy().shape)
    inp = inp.numpy().transpose((1, 2, 0))
#     print(inp.shape)
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    fig = plt.figure(figsize=(8, 8))
    _ = plt.imshow(inp)
    ax = plt.gca()
    
    ax.add_patch(patches.Rectangle(
        (
            (bb[0] - bb[2]/2) * inp.shape[0],
            (bb[1] - bb[3]/2) * inp.shape[1]
        ),
        (bb[2]) * inp.shape[0],
        (bb[3]) * inp.shape[1],
        linewidth=1,edgecolor='b',facecolor='none'))
    ax.add_patch(patches.Rectangle(
        (
            (pred[0] - pred[2]/2) * inp.shape[0],
            (pred[1] - pred[3]/2) * inp.shape[1]
        ),
        (pred[2]) * inp.shape[0],
        (pred[3]) * inp.shape[1],
        linewidth=1,edgecolor='w',facecolor='none'))
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
#     plt.show()

# inputs, classes, bb = next(iter(dataloaders['train']))   

# out = torchvision.utils.make_grid(inputs)
# inputs
# imshow(out)

In [5]:
def visualize_model(model, num_images=6, typ='val'):
    was_training = model.training
    model.eval()
    images_so_far = 0
#     fig = plt.figure(figsize=(4, 4))

    with torch.no_grad():
        for i, (inputs, labels, bb) in enumerate(dataloaders[typ]):
            inputs = inputs.to(device)
            labels = labels.to(device)
            bb = bb.to(device)

            outputs = torch.sigmoid(model(inputs))
#             _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
#                 ax = plt.subplot(num_images//2, 2, images_so_far)
#                 ax.axis('off')
#                 ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j], bb.cpu().data[j], outputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [6]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
                     ]),
    'test': transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
                     ]),
    'val': transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

                     ]),
}
# DeepLesionDataset('./test_set.tsv', '/media/mark/Data/deeplesion/Images_png')

file_names = {'train': '../sets/validation_set_small.tsv',#train_set_small.tsv', 
              'val': '../sets/validation_set_small.tsv', 
              'test': '../sets/test_set_small.tsv'}

image_datasets = {x: DeepLesionDataset(file_names[x], '/media/mark/Data/deeplesion/Images_png',
                                          data_transforms[x])
                  for x in ['train', 'val', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=1,
                                             shuffle=True, num_workers=1)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
# class_names = image_datasets['train']['labels']



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

In [7]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

#     best_model_wts = copy.deepcopy(model.state_dict())
#     best_acc = 0.0
    best_loss = 1000000.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0

            # Iterate over data.
            for inputs, targets in dataloaders[phase]:
                inputs = inputs.to(device)
                targets = targets.to(device)
                

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                     # zero the parameter gradients
                    optimizer.zero_grad()
                    outputs = model(targets)
#                     print(model.features[0].weight)
#                     outputs = torch.sigmoid(model(inputs))
#                     _, preds = torch.max(outputs, 1)
                    
                    loss = criterion(outputs, targets)

                    # backward + optimize only if in training phase
                    if phase == 'train':

                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item()# / inputs.size(0)
                # running_loss += loss.item() loss.item() * inputs.size(0)
#                 running_corrects += torch.sum(preds == bb.data)

            epoch_loss = running_loss / dataset_sizes[phase]
#             epoch_acc = running_corrects.double() / dataset_sizes[phase]

#             print('{} Loss: {:.4f} Acc: {:.4f}'.format(
#                 phase, 
#                 epoch_loss, 
#                 epoch_acc))
            print('{} Loss: {:.4f}'.format(
                phase, 
                epoch_loss, 
#                 epoch_acc
            ))

    
#             # deep copy the model
#             if phase == 'val' and epoch_acc > best_acc:
#                 best_acc = epoch_acc
#                 best_model_wts = copy.deepcopy(model.state_dict())
                
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
#                 best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
#     visualize_model(model, 16, typ='train')
    return model

In [8]:
class YOLONet(torch.nn.Module):
    def __init__(self):
        # I would like to keep image in houndfield units but it might be tricky
        # Dont know if I should use their windows or not - probably should start with this
        """
        YOLO layout
        inp : 448 x 448 x 3 

        1: (conv (7 x 7) x 64 -> mp (2 x 2) x s2 ) x 2
        out: 112 x 112 x 64
        2: (conv (3 x 3) x 192) -> mp (2 x 2) x s2) x 1
        out: 56 x 56 x 192
        3: conv:
            (1 x 1) x 128
            (3 x 3) x 256
            (1 x 1) x 256
            (3 x 3) x 512
           mp: (2 x 2) x s2
        out: 28 x 28 x 512
        4: conv:
            (1 x 1) x 256 -| x 4
            (3 x 3) x 512 -|
            (1 x 1) x 512
            (3 x 3) x 1024
           mp: (2 x 2) x s2
        out: 14 x 14 x 1024  ### possibly keep at this resolution for higher res grid
        5: conv:
            (1 x 1) x 512  -| x 2
            (3 x 3) x 1024 -|
            (3 x 3) x 1024
            (3 x 3) x 1024 s2
        out: 7 x 7 x 1024
        6: conv:
            ((3 x 3) x 1024) x 2
        out: 7 x 7 x 1024
        7: FC -> 4096
        8: FC -> 7x7x10
        """
        super(YOLONet, self).__init__()
        # in : 448 x 448 x 1
        # out: 112 x 112 x 64
        self.features = torch.nn.Sequential( 
            nn.Conv2d(3, 64, 7, padding=3),
            nn.MaxPool2d(2, stride=2),
            nn.Conv2d(64, 64, 7, padding=3),
            nn.MaxPool2d(2, stride=2),
        
        # in : 112 x 112 x 64
        # out: 56 x 56 x 192
        
            nn.Conv2d(64, 192, 3, padding=1),
            nn.MaxPool2d(2, stride=2),
        
        # in : 56 x 56 x 192
        # out: 28 x 28 x 512
            nn.Conv2d(192, 128, 1),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.Conv2d(256, 256, 1),
            nn.Conv2d(256, 512, 3, padding=1),
            nn.MaxPool2d(2, stride=2),
        # in : 28 x 28 x 512
        # out: 14 x 14 x 1024
            nn.Conv2d(512, 256, 1),
            nn.Conv2d(256, 512, 3, padding=1),
            nn.Conv2d(512, 256, 1),
            nn.Conv2d(256, 512, 3, padding=1),
            nn.Conv2d(512, 256, 1),
            nn.Conv2d(256, 512, 3, padding=1),
            nn.Conv2d(512, 256, 1),
            nn.Conv2d(256, 512, 3, padding=1),
            nn.Conv2d(512, 512, 1),
            nn.Conv2d(512, 1024, 3, padding=1),
            nn.MaxPool2d(2, stride=2),
        # in : 14 x 14 x 1024
        # out: 7 x 7 x 1024
            nn.Conv2d(1024, 512, 1),
            nn.Conv2d(512, 1024, 3, padding=1),
            nn.Conv2d(1024, 512, 1),
            nn.Conv2d(512, 1024, 3, padding=1),
            nn.Conv2d(1024, 1024, 3, padding=1),
            nn.Conv2d(1024, 1024, 3, padding=1, stride=2),
        # in : 7 x 7 x 1024
        # out: 7 x 7 x 1024
            nn.Conv2d(1024, 1024, 3, padding=1),
            nn.Conv2d(1024, 1024, 3, padding=1),
        )

        
        fc1 = nn.Linear(7 * 7 * 1024, 4096)
        fc2 = nn.Linear(4096, 7 * 7 * 11)
        
        self.connected = nn.Sequential(
            fc1, 
            fc2
        )

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary (differentiable) operations on Tensors.
        """
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.connected(x)
        x = x.view(x.size(0), 7, 7, 11)
        return torch.sigmoid(x)


In [9]:
# source : https://github.com/kevin970401/pytorch-YOLO-v1/blob/master/loss/loss.py

import torch
import torch.nn as nn

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision

class yoloLoss(nn.Module):
    def __init__(self, S, B, C, lambda_coord=5, lambda_noobj=0.5):
        super(yoloLoss, self).__init__()
        self.S = S
        self.B = B
        self.C = C
        self.lambda_coord = lambda_coord
        self.lambda_noobj = lambda_noobj
        
    def calc_iou(self, A, B):
        """calc iou A & B
        
        Args:
            A (torch.FloatTensor): [N, SxSx(Bx5+C)]
            B (torch.FloatTensor): [N, SxSx(Bx5+C)]
        """

        A = A.view(-1, self.S, self.S, self.B * 5 + self.C)
        B = B.view(-1, self.S, self.S, self.B * 5 + self.C)
        
        A_x_center = A[:, :, :, 0:self.B*5:5]
        A_y_center = A[:, :, :, 1:self.B*5:5]
        A_w = A[:, :, :, 2:self.B*5:5]
        A_h = A[:, :, :, 3:self.B*5:5]
        
        B_x_center = B[:, :, :, 0:self.B*5:5]
        B_y_center = B[:, :, :, 1:self.B*5:5]
        B_w = B[:, :, :, 2:self.B*5:5]
        B_h = B[:, :, :, 3:self.B*5:5]
        
        A_area = A_w * A_h
        B_area = B_w * B_h

        inter_box_x0, _ = torch.max(torch.cat([(A_x_center-A_w/2).unsqueeze(dim=-1), (B_x_center-B_w/2).unsqueeze(dim=-1)], dim=-1), dim=-1)
        inter_box_y0, _ = torch.max(torch.cat([(A_y_center-A_h/2).unsqueeze(dim=-1), (B_y_center-B_h/2).unsqueeze(dim=-1)], dim=-1), dim=-1)

        inter_box_x1, _ = torch.min(torch.cat([(A_x_center+A_w/2).unsqueeze(dim=-1), (B_x_center+B_w/2).unsqueeze(dim=-1)], dim=-1), dim=-1)
        inter_box_y1, _ = torch.min(torch.cat([(A_y_center+A_h/2).unsqueeze(dim=-1), (B_y_center+B_h/2).unsqueeze(dim=-1)], dim=-1), dim=-1)
        
        inter_box_w = inter_box_x1-inter_box_x0
        inter_box_h = inter_box_y1-inter_box_y0

        inter = inter_box_w * inter_box_h * (inter_box_h>0).float() * (inter_box_w>0).float()

        iou = inter / (A_area + B_area - inter + 1e-6)

        return iou
        
    def get_argmax_iou(self, A, B):
        """get argmax of iou A & B
        
        Args:
            A (torch.FloatTensor): [N, SxSx(Bx5+C)]
            B (torch.FloatTensor): [N, SxSx(Bx5+C)]
        """

        iou = self.calc_iou(A, B)
        """iou: [N, S, S, B]
        """
        
        argmax = torch.argmax(iou, dim=-1)
        return argmax

    def forward(self, pred, target):
        """calc loss
        
        Args:
            pred (torch.floatTensor): [N, S, S, (Bx5+C)]
            target (torch.floatTensor): [N, S, S, Bx5+C] score is always equal to 1. bbox: [x_center, y_center, w, h]
        """
        num_elements = self.B * 5 + self.C
        num_batch = target.size(0)
        
        target = target.view(-1, self.S*self.S, num_elements)
        pred = pred.view(-1, self.S*self.S, num_elements)
        """now target and pred: [N, SxS, (Bx5+C)]
        """

        obj_mask = target[:,:,4] > 0
        noobj_mask = target[:,:,4] == 0

        obj_mask = obj_mask.unsqueeze(-1).expand_as(target).float()
        noobj_mask = noobj_mask.unsqueeze(-1).expand_as(target).float()
        """now obj_mask and noobj: [N, SxS, (Bx5+C)]
        """
        
        responsible_bbox_arg = self.get_argmax_iou(pred, target)
        responsible_bbox_scatter = torch.tensor((0, 1, 2, 3, 4))\
                                .repeat((num_batch, self.S * self.S, 1)).cuda()\
                                + responsible_bbox_arg.view(-1, self.S*self.S, 1)
        responsible_bbox_mask = torch.zeros((num_batch, self.S * self.S, self.B * 5 + self.C)).cuda()\
                                .scatter_(2, responsible_bbox_scatter, torch.ones((num_batch, self.S * self.S, self.B * 5 + self.C)).cuda())
        responsible_bbox_mask = responsible_bbox_mask * obj_mask

        # class prediction loss
        class_prediction_loss = ((torch.sigmoid(pred) - torch.sigmoid(target)) * obj_mask)[:, :, self.B*5:].pow(2).sum()

        # no obj loss
        noobj_loss = self.lambda_noobj * ((torch.sigmoid(pred) - torch.sigmoid(target)) * noobj_mask)[:, :, 4:self.B*5:5].pow(2).sum()

        # obj loss
        obj_loss = ((torch.sigmoid(pred) - torch.sigmoid(target)) * responsible_bbox_mask)[:, :, 4:self.B*5:5].pow(2).sum()

        # coord loss
        coord_xy_loss = self.lambda_coord * ((pred-target) * responsible_bbox_mask)[:, :, 0:self.B*5:5].pow(2).sum()\
                        + self.lambda_coord * ((pred-target) * responsible_bbox_mask)[:, :, 1:self.B*5:5].pow(2).sum()

        coord_wh_loss = self.lambda_coord * ((pred-target) * responsible_bbox_mask)[:, :, 2:self.B*5:5].pow(2).sum()\
                        + self.lambda_coord * ((pred-target) * responsible_bbox_mask)[:, :, 3:self.B*5:5].pow(2).sum()
        
        total_loss = class_prediction_loss + noobj_loss + obj_loss + coord_xy_loss + coord_wh_loss

        return total_loss/num_batch

# model_ft = YOLONet()
# model_ft = model_ft.to(device)
    

# loss = Loss(7, 2, 1)

# for inputs, target in dataloaders['train']:
#     inputs = inputs.to(device)
#     target = labels.to(device)
#     pred = model_ft(inputs)
#     print(pred.shape, target.shape)
#     print(loss(pred, target))
#     break

# optimizer = optim.SGD(model.parameters(), lr=0.001)
# results = None
# target = None
# c = 0
# model.train()
# for e in range(20):
#     for inputs, labels in dataloaders['train']:
#         inputs = inputs.to(device)
#         target = labels.to(device)
#         optimizer.zero_grad()
#         results = model(inputs)
#         loss = yoloLoss(7,2, 5, 1)(results, target)
#         print(loss.item())
#         loss.backward()
#         optimizer.step()
#         c += 1
# #     break
       
# # print(target.shape)
# # print(results.shape)



In [10]:

model_ft = YOLONet()

model_ft = model_ft.to(device)

# criterion = nn.SmoothL1Loss(reduction='sum')#reduction='sum')
criterion = yoloLoss(7, 2, 1)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)

# Decay LR by a factor of 0.1 every 24 epochs
# exp_lr_scheduler = None#lr_scheduler.StepLR(optimizer_ft, step_size=100, gamma=0.1)

In [11]:
model_ft = train_model(model_ft, criterion, optimizer_ft, None,
                       num_epochs=1)

Epoch 0/0
----------


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 7, 7, 11] to have 3 channels, but got 7 channels instead

In [None]:
optimizer_ft.zero_grad()

In [None]:
visualize_model(model_ft, 16, typ='val')

In [None]:
def check_output(model, criterion):
    was_training = model.training
    model.eval()
    with torch.no_grad():
        for i, (inputs, labels, bb) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)
            bb = bb.to(device)

            outputs = torch.sigmoid(model(inputs))

            loss = criterion(outputs.view(-1, 1), bb.view(-1, 1))
            print(outputs)
            print(bb)
            print('l', loss)
            print(loss.sum(dim=1))
#             print(loss.item())
            return outputs, bb
            break
    model.train(mode=was_training)

o, bb = check_output(model_ft, nn.PairwiseDistance())

In [None]:
for f in model_ft.parameters():
    print(f)

In [None]:
model_ft = YOLONet()
pp = 0
for p in model_ft.parameters():
    if p.requires_grad:
        n_n=1
        for s in list(p.size()):
            n_n = n_n*s
        pp += n_n
print(pp)

In [3]:
models.alexnet(pretrained=False)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_feature