# Object Detection
* 2 people in a group
* Deadline: 10/13

## Dataset

- PASCAL VOC 2007
  - Number of class: 20
  - The data list is provided in the google drive. However, you have to download the training/testing data from http://host.robots.ox.ac.uk/pascal/VOC/voc2007/. 
    - Train/Val data: 5011
        - Each row contains one image and its bounding boxes.
        - filename ($x_{min}$, $y_{min}$, $x_{max}$, $y_{max}$, $label$) $\times$ object_num
        - class idx starts from 1
    - Test data: 4951
        - filename ($x_{min}$, $y_{min}$, $x_{max}$, $y_{max}$, $label$) $\times$ object_num
        - class idx starts from 0
    


### Loading your data into Google Colab with Google Drive

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
GRID_NUM = 7

## Resnet50 backbone

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.resnet import BasicBlock, Bottleneck
from torchvision.models.resnet import model_urls
from torchsummary import summary

class classify_bottleneck(nn.Module):
  expansion = 1

  def __init__(self, inplanes, planes, stride=1, block_type='A'):
    super(classify_bottleneck, self).__init__()
    self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=2, bias=False,dilation=2)
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, planes, kernel_size=1, bias=False)
    self.bn3 = nn.BatchNorm2d(planes)

    self.downsample = nn.Sequential()
    if stride != 1 or block_type=='B':
        self.downsample = nn.Sequential(
            nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(self.expansion*planes)
        )

  def forward(self, x):
    out = F.relu(self.bn1(self.conv1(x)))
    out = F.relu(self.bn2(self.conv2(out)))
    out = self.bn3(self.conv3(out))
    out += self.downsample(x)
    out = F.relu(out)
    return out

class ResNetYoloV1(nn.Module):

    def __init__(self, resnet_type):
	
        resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'),
		       34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'),
		       50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'),
		       101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'),
		       152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')}
        block, layers, channels, name = resnet_spec[resnet_type]
        
        self.name = name
        self.inplanes = 64
        super(ResNetYoloV1, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.layer5 = self._make_classify_layer(in_channels=2048) #2048*14*14

        self.conv_end = nn.Conv2d(256, 30, kernel_size=3, stride=2, padding=1, bias=False)#30*7*7
        self.bn_end = nn.BatchNorm2d(30)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.normal_(m.weight, mean=0, std=0.001)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    def _make_classify_layer(self,in_channels):
        layers = []
        layers.append(classify_bottleneck(inplanes=in_channels, planes=256, block_type='B'))
        layers.append(classify_bottleneck(inplanes=256, planes=256))
        layers.append(classify_bottleneck(inplanes=256, planes=256))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x1 = self.layer1(x)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)
        # x4 layer output size: (B, 2048, 7, 7)
        x5 = self.layer5(x4)
        x = self.conv_end(x5)
        x = self.bn_end(x)
        x = torch.sigmoid(x) #归一化到0-1
        # x = x.view(-1,7,7,30)
        x = x.permute(0,2,3,1) #(-1,7,7,30)
        return x

    def init_weights(self):
        org_resnet = torch.utils.model_zoo.load_url(model_urls[self.name])
        # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error
        org_resnet.pop('fc.weight', None)
        org_resnet.pop('fc.bias', None)

        self.load_state_dict(org_resnet)
        print("Initialize resnet from model zoo")

def load_change_weights(model, model_name):
  
  org_resnet = torch.utils.model_zoo.load_url(model_urls[model_name])
  org_resnet.pop('fc.weight', None)
  org_resnet.pop('fc.bias', None)

  dd = model.state_dict()
  for k in org_resnet.keys():
      # print(k)
      if k in dd.keys() and not k.startswith('fc'):
          # print('yes')
          dd[k] = org_resnet[k]
  model.load_state_dict(dd)
  return model

### Assignment
You are required to build a model to perform object detection on the provided Pascal VOC dataset in this project.
Here are some hints that help you to accomplish the project successfully.

### Hints
- YOLOv1 is the simplest and suggested model to be implemented.
- Be careful of the normalization techniques on bounding boxes.
    1. normalize the height and width with image size to fall into 0 and 1
    2. x and y coordinates are parameterized to be the offsets of a particular grid cell and also bounded by 0 and 1
- Loss function has a great impact on training stability.
    1. loss function is the most important in this project, especially in calculating IOU
    2. only one bounding box predictor is responsible for each object
    3. weights for different types of losses
    4. predict the square root of height and width instead of predicting them directly
- Data augmentation.
    1. It contains only 5011 images in total. Furthermore, the labels are highly imbalanced.
    2. Random scaling and translations are applied when training YOLO.
    3. Note that the bounding box coordinates have to be changed accordingly if the image was transformed.

### Evaluation Metric
- Please evaluate your model on Pascal VOC testing set using Mean Average Precision (mAP).
- Write a brief report including your implementation, performance and  qualitative results(visualize bounding box on some images). 
- For more detailed explanation of mAP, please follow https://github.com/rafaelpadilla/Object-Detection-Metrics

In [3]:
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torchvision.transforms as transforms


## Dataset & data augmentation

In [4]:
import os.path
import random
import numpy as np
import torch
import torch.utils.data as data
import torchvision.transforms as transforms
import cv2
import matplotlib.pyplot as plt

class yoloDataset(data.Dataset):
    image_size = 448 # Size to be aligned

    # Parsing data list
    def __init__(self,root,list_file,train,transform):
        self.root = root
        self.train = train
        self.transform = transform
        self.fnames = []
        self.boxes = []
        self.labels = []
        self.mean = (123,117,104) # RGB

        # Cat multiple list files together.
        '''if isinstance(list_file, list):
            # This is especially useful for voc07/voc12 combination.
            tmp_file = '/tmp/listfile.txt'
            os.system('cat %s > %s' % (' '.join(list_file), tmp_file))
            list_file = tmp_file'''

        with open(list_file) as f:
            lines  = f.readlines()

        # format of each line: filename (x_min, y_min, x_max, y_max, label) * object_num
        for line in lines:
            splited = line.strip().split() # .strip(): reomove space, tab from the end of each line
            self.fnames.append(splited[0])
            num_boxes = (len(splited) - 1) // 5
            box=[]
            label=[]
            for i in range(num_boxes):
                x = float(splited[1+5*i])
                y = float(splited[2+5*i])
                x2 = float(splited[3+5*i])
                y2 = float(splited[4+5*i])
                c = splited[5+5*i]
                box.append([x,y,x2,y2])
                label.append(int(c)+1) # +1: since the idx start from 0
            self.boxes.append(torch.Tensor(box))
            self.labels.append(torch.LongTensor(label))
        self.num_samples = len(self.boxes)

    # Getting single transformed, preprocessed image and its target
    def __getitem__(self,idx):
        fname = self.fnames[idx]
        img = cv2.imread(os.path.join(self.root+fname))
        boxes = self.boxes[idx].clone()
        labels = self.labels[idx].clone()

        # Randomly transforming image
        if self.train:
            #img = self.random_bright(img)
            img, boxes = self.random_flip(img, boxes)
            img,boxes = self.randomScale(img,boxes)
            img = self.randomBlur(img)
            img = self.RandomBrightness(img)
            img = self.RandomHue(img)
            img = self.RandomSaturation(img)
            img,boxes,labels = self.randomShift(img,boxes,labels)
            img,boxes,labels = self.randomCrop(img,boxes,labels)

        # #debug: showing the transformed image
        # box_show = boxes.numpy().reshape(-1)
        # # print(box_show)
        # img_show = self.BGR2RGB(img)
        # pt1=(int(box_show[0]),int(box_show[1])); pt2=(int(box_show[2]),int(box_show[3]))
        # cv2.rectangle(img_show,pt1=pt1,pt2=pt2,color=(0,255,0),thickness=1)
        # plt.figure()
        
        # plt.imshow(img_show)
        # plt.show()
        # #debug

        h,w,_ = img.shape
        boxes /= torch.Tensor([w,h,w,h]).expand_as(boxes) 
        # .expand_as(other): expand this tensor as other
        # [w, h, w, h] (1, 4) will be expanded to (#box, 4)

        img = self.BGR2RGB(img) # because pytorch pretrained model use RGB
        img = self.subMean(img,self.mean)
        img = cv2.resize(img,(self.image_size,self.image_size))
        target = self.encoder(boxes,labels) # 7x7x30, where 30 = 5*2(xywh+confidence for 2 boxes) + 20(classes)
        for t in self.transform:
            img = t(img)

        return img,target

    def __len__(self):
        return self.num_samples

    # Utils
    # Encoding the boxes, labels for single image
    def encoder(self,boxes,labels):
        grid_num = GRID_NUM
        target = torch.zeros((grid_num,grid_num,30))
        cell_size = 1./grid_num
        wh = boxes[:,2:]-boxes[:,:2]
        cxcy = (boxes[:,2:]+boxes[:,:2])/2
        for i in range(cxcy.size()[0]):
            cxcy_sample = cxcy[i]
            ij = (cxcy_sample/cell_size).ceil()-1 #
            target[int(ij[1]),int(ij[0]),4] = 1
            target[int(ij[1]),int(ij[0]),9] = 1
            target[int(ij[1]),int(ij[0]),int(labels[i])+9] = 1
            xy = ij*cell_size # upper left coordinates of corresponding grid
            delta_xy = (cxcy_sample -xy)/cell_size
            target[int(ij[1]),int(ij[0]),2:4] = wh[i]
            target[int(ij[1]),int(ij[0]),:2] = delta_xy
            target[int(ij[1]),int(ij[0]),7:9] = wh[i]
            target[int(ij[1]),int(ij[0]),5:7] = delta_xy
        return target

    def BGR2RGB(self,img):
        return cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    def BGR2HSV(self,img):
        return cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
    def HSV2BGR(self,img):
        return cv2.cvtColor(img,cv2.COLOR_HSV2BGR)
    
    def subMean(self,bgr,mean):
        mean = np.array(mean, dtype=np.float32)
        bgr = bgr - mean
        return bgr
    
    def RandomBrightness(self,bgr):
        if random.random() < 0.5:
            hsv = self.BGR2HSV(bgr)
            h,s,v = cv2.split(hsv)
            adjust = random.choice([0.5,1.5])
            v = v*adjust
            v = np.clip(v, 0, 255).astype(hsv.dtype)
            hsv = cv2.merge((h,s,v))
            bgr = self.HSV2BGR(hsv)
        return bgr

    def RandomSaturation(self,bgr):
        if random.random() < 0.5:
            hsv = self.BGR2HSV(bgr)
            h,s,v = cv2.split(hsv)
            adjust = random.choice([0.5,1.5])
            s = s*adjust
            s = np.clip(s, 0, 255).astype(hsv.dtype)
            hsv = cv2.merge((h,s,v))
            bgr = self.HSV2BGR(hsv)
        return bgr

    def RandomHue(self,bgr):
        if random.random() < 0.5:
            hsv = self.BGR2HSV(bgr)
            h,s,v = cv2.split(hsv)
            adjust = random.choice([0.5,1.5])
            h = h*adjust
            h = np.clip(h, 0, 255).astype(hsv.dtype)
            hsv = cv2.merge((h,s,v))
            bgr = self.HSV2BGR(hsv)
        return bgr

    def randomBlur(self,bgr):
        if random.random()<0.5:
            bgr = cv2.blur(bgr,(5,5))
        return bgr

    def randomShift(self,bgr,boxes,labels):
        center = (boxes[:,2:]+boxes[:,:2])/2
        if random.random() <0.5:
            height,width,c = bgr.shape
            after_shfit_image = np.zeros((height,width,c),dtype=bgr.dtype)
            after_shfit_image[:,:,:] = (104,117,123) #bgr
            shift_x = random.uniform(-width*0.2,width*0.2)
            shift_y = random.uniform(-height*0.2,height*0.2)
            #print(bgr.shape,shift_x,shift_y)
            #原图像的平移
            if shift_x>=0 and shift_y>=0:
                after_shfit_image[int(shift_y):,int(shift_x):,:] = bgr[:height-int(shift_y),:width-int(shift_x),:]
            elif shift_x>=0 and shift_y<0:
                after_shfit_image[:height+int(shift_y),int(shift_x):,:] = bgr[-int(shift_y):,:width-int(shift_x),:]
            elif shift_x <0 and shift_y >=0:
                after_shfit_image[int(shift_y):,:width+int(shift_x),:] = bgr[:height-int(shift_y),-int(shift_x):,:]
            elif shift_x<0 and shift_y<0:
                after_shfit_image[:height+int(shift_y),:width+int(shift_x),:] = bgr[-int(shift_y):,-int(shift_x):,:]

            shift_xy = torch.FloatTensor([[int(shift_x),int(shift_y)]]).expand_as(center)
            center = center + shift_xy
            mask1 = (center[:,0] >0) & (center[:,0] < width)
            mask2 = (center[:,1] >0) & (center[:,1] < height)
            mask = (mask1 & mask2).view(-1,1)
            boxes_in = boxes[mask.expand_as(boxes)].view(-1,4)
            if len(boxes_in) == 0:
                return bgr,boxes,labels
            box_shift = torch.FloatTensor([[int(shift_x),int(shift_y),int(shift_x),int(shift_y)]]).expand_as(boxes_in)
            boxes_in = boxes_in+box_shift
            labels_in = labels[mask.view(-1)]
            return after_shfit_image,boxes_in,labels_in
        return bgr,boxes,labels

    def randomScale(self,bgr,boxes):
        #固定住高度，以0.8-1.2伸缩宽度，做图像形变
        if random.random() < 0.5:
            scale = random.uniform(0.8,1.2)
            height,width,c = bgr.shape
            bgr = cv2.resize(bgr,(int(width*scale),height))
            scale_tensor = torch.FloatTensor([[scale,1,scale,1]]).expand_as(boxes)
            boxes = boxes * scale_tensor
            return bgr,boxes
        return bgr,boxes

    def randomCrop(self,bgr,boxes,labels):
        if random.random() < 0.5:
            center = (boxes[:,2:]+boxes[:,:2])/2
            height,width,c = bgr.shape
            h = random.uniform(0.6*height,height)
            w = random.uniform(0.6*width,width)
            x = random.uniform(0,width-w)
            y = random.uniform(0,height-h)
            x,y,h,w = int(x),int(y),int(h),int(w)

            center = center - torch.FloatTensor([[x,y]]).expand_as(center)
            mask1 = (center[:,0]>0) & (center[:,0]<w)
            mask2 = (center[:,1]>0) & (center[:,1]<h)
            mask = (mask1 & mask2).view(-1,1)

            boxes_in = boxes[mask.expand_as(boxes)].view(-1,4)
            if(len(boxes_in)==0):
                return bgr,boxes,labels
            box_shift = torch.FloatTensor([[x,y,x,y]]).expand_as(boxes_in)

            boxes_in = boxes_in - box_shift
            boxes_in[:,0]=boxes_in[:,0].clamp_(min=0,max=w)
            boxes_in[:,2]=boxes_in[:,2].clamp_(min=0,max=w)
            boxes_in[:,1]=boxes_in[:,1].clamp_(min=0,max=h)
            boxes_in[:,3]=boxes_in[:,3].clamp_(min=0,max=h)

            labels_in = labels[mask.view(-1)]
            img_croped = bgr[y:y+h,x:x+w,:]
            return img_croped,boxes_in,labels_in
        return bgr,boxes,labels

    def random_flip(self, im, boxes):
        if random.random() < 0.5:
            im_lr = np.fliplr(im).copy()
            h,w,_ = im.shape
            xmin = w - boxes[:,2]
            xmax = w - boxes[:,0]
            boxes[:,0] = xmin
            boxes[:,2] = xmax
            return im_lr, boxes
        return im, boxes

    def random_bright(self, im, delta=16): # unused
        alpha = random.random()
        if alpha > 0.3:
            im = im * alpha + random.randrange(-delta,delta)
            im = im.clip(min=0,max=255).astype(np.uint8)
        return im

## Yolov1 Loss


In [5]:
# Loss function
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class yolov1Loss(nn.Module):
    def __init__(self, S, B, C, lambda_coord, lambda_noobj):
        # Args:
        #    S: size of grid
        #    B: number of box
        #    C: number of class
        super(yolov1Loss, self).__init__()
        self.S = S 
        self.B = B 
        self.C = C 
        self.l_coord = lambda_coord
        self.l_noobj = lambda_noobj
    
    def calculateIoU(self, box1, box2):
        # calculate the intersection over the union of two sets of boxes, each box contains [xmin,ymin,xmax,ymax]
        # Args:
        #    size of box1 = [n,4]
        #    size of box2 = [m,4]
        # Return:
        #    size of Iou of two sets of boxes = [n,m]
        n = box1.size(0)
        m = box2.size(0)
        
        # take the max of left-bottom point and the min of right-top point 
        # to calculate the left-top point and the right-bottom point of the intersection
        lt = torch.max(
            box1[:,:2].unsqueeze(1).expand(n,m,2), # take [xmin,ymin]: [n,2] -> [n,1,2] -> [n,m,2]
            box2[:,:2].unsqueeze(0).expand(n,m,2)  # take [xmin,ymin]: [m,2] -> [1,m,2] -> [n,m,2]
        )
        
        rb = torch.min(
            box1[:,2:].unsqueeze(1).expand(n,m,2), # take [xmax,ymax]: [n,2] -> [n,1,2] -> [n,m,2]
            box2[:,2:].unsqueeze(0).expand(n,m,2)  # take [xmax,ymax]: [m,2] -> [1,m,2] -> [n,m,2]
        )
        
        # calculate weight and height of intersection areas and check if intersection area is 0
        wh = rb - lt # [n,m,2]
        wh[wh<0] = 0 # if max_left >= min_right or max_bottom >= min_top, then there is no intersection
        intersection = wh[:,:,0] * wh[:,:,1] # [n,m]
        
        area1 = (box1[:,2]-box1[:,0])*(box1[:,3]-box1[:,1])  #[n,]     
        area2 = (box2[:,2]-box2[:,0])*(box2[:,3]-box2[:,1])  #[m,]
        area1 = area1.unsqueeze(1).expand(n,m) # [n,] -> [n,1] -> [n,m]
        area2 = area2.unsqueeze(0).expand(n,m) # [m,] -> [1,m] -> [n,m]
        
        iou = intersection / (area1 + area2 - intersection)
        return iou
    def forward(self, preds, targets):
        # Args:
        #    size of preds = [batchsize, S, S, Bx5+20]: Bx5 means each box has [x,y,w,h,c] 5 values
        #    size of targets = [batchsize, S, S, Bx5+20]
        S, B, C = self.S, self.B, self.C
        N = B * 5 + C 
        batchsize = preds.size(0)
        coord_mask = targets[:,:,:,4] > 0        
        noobj_mask = targets[:,:,:,4] == 0
        coord_mask = coord_mask.unsqueeze(-1).expand(batchsize, S, S, N)        
        noobj_mask = noobj_mask.unsqueeze(-1).expand(batchsize, S, S, N)
        
        coord_pred = preds[coord_mask].view(-1, N)
        box_pred = coord_pred[:,:5*B].contiguous().view(-1, 5)
        class_pred = coord_pred[:,5*B:]
        
        coord_target = targets[coord_mask].view(-1, N)
        box_target = coord_target[:,:5*B].contiguous().view(-1, 5)
        class_target = coord_target[:,5*B:]
        
        # compute noobj_loss: only calculate confidence loss
        noobj_pred = preds[noobj_mask].view(-1, N)
        noobj_target = targets[noobj_mask].view(-1, N)
        noobj_pred_mask = torch.cuda.BoolTensor(noobj_pred.size())
        noobj_pred_mask.zero_()
        for b in range(B):
            noobj_pred_mask[:, 4+b*5] = 1
        noobj_pred_conf = noobj_pred[noobj_pred_mask]
        noobj_target_conf = noobj_target[noobj_pred_mask]  
        loss_noobj = F.mse_loss(noobj_pred_conf, noobj_target_conf, reduction = 'sum')
        
        # compute coord_loss
        coord_response_mask = torch.cuda.BoolTensor(box_target.size()).fill_(0) # only compute the loss of the box containing the center of object
        box_target_iou = torch.zeros(box_target.size()).cuda()
        
        # Choose the pred box having the highest IoU for each target boxes
        for i in range(0, box_target.size(0), B):
            # take all predict boxes at i-th cell
            pred_boxes = box_pred[i:i+B]
            pred_xyxy = Variable(torch.FloatTensor(pred_boxes.size()))
            pred_xyxy[:, :2] = pred_boxes[:, :2]/float(S) - 0.5*pred_boxes[:,2:4]
            pred_xyxy[:, 2:4] = pred_boxes[:, :2]/float(S) + 0.5*pred_boxes[:,2:4]   
            
            # take all target boxes at i-th cell
            # Since target boxes contained by each cell are identical in current implement,thus just take the first one
            target_boxes = box_target[i].view(-1, 5)
            target_xyxy = Variable(torch.FloatTensor(target_boxes.size()))
            target_xyxy[:, :2] = target_boxes[:, :2]/float(S) - 0.5*target_boxes[:,2:4]
            target_xyxy[:, 2:4] = target_boxes[:, :2]/float(S) + 0.5*target_boxes[:,2:4]
                                   
            iou = self.calculateIoU(pred_xyxy[:,:4], target_xyxy[:,:4]) # [B,1]
            max_iou, max_index = iou.max(0)
            max_index = max_index.data.cuda()
            
            coord_response_mask[i+max_index] = 1
            box_target_iou[i+max_index, torch.LongTensor([4]).cuda()] = (max_iou).data.cuda()
        
        # calculate the loss of the response boxes
        box_target_iou = Variable(box_target_iou).cuda()
        box_pred_response = box_pred[coord_response_mask].view(-1, 5)
        box_target_response = box_target[coord_response_mask].view(-1, 5)
        target_iou = box_target_iou[coord_response_mask].view(-1, 5)
        loss_xy = F.mse_loss(box_pred_response[:,:2], box_target_response[:,:2], reduction = 'sum')
        loss_wh = F.mse_loss(torch.sqrt(box_pred_response[:,2:4]), torch.sqrt(box_target_response[:,2:4]), reduction = 'sum')                
        loss_obj = F. mse_loss(box_pred_response[:,4], target_iou[:,4], reduction = 'sum')
        
        # calculate the class probability loss of cells containing objects
        loss_class = F.mse_loss(class_pred, class_target, reduction = 'sum')
        
        # total loss
        loss = self.l_coord * (loss_xy + loss_wh) + loss_obj + self.l_noobj*loss_noobj + loss_class
        loss = loss/float(batchsize)
                                   
        return loss

## Training Process


In [8]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")

In [7]:
batch_size = 8 #64
momentum = 0.9
decay = 0.0005
epochs = 50

file_root = 'VOCdevkit/VOC2007/JPEGImages/'

def train():
  ## model initiate
  learning_rate = 0.001
  yolov1 = ResNetYoloV1(50)
  yolov1 = load_change_weights(yolov1, 'resnet50')

  net = yolov1
  net.cuda()
  optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum = momentum, weight_decay=decay)
  # load data
  train_dataset = yoloDataset(root=file_root,list_file=file_root+'voc2007train.txt', train=True,transform = [transforms.ToTensor()] )
  train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=4)
  test_dataset = yoloDataset(root=file_root,list_file=file_root+'voc2007valid.txt',train=False,transform = [transforms.ToTensor()] )
  test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=4)
  print('the dataset has %d images' % (len(train_dataset)))
  print('the batch_size is %d' % (batch_size))
  # training process
  criterion = yolov1Loss(GRID_NUM,2,20,5,0.5)
  for ep in range(epochs):
    net.train()
    if ep >= 2:
      learning_rate = 0.01
    if ep >= 30:
      learning_rate = 0.001
    if ep >= 45:
      learning_rate = 0.0001
    for param_group in optimizer.param_groups:
      param_group['lr'] = learning_rate
    total_loss = 0.
    total_data = 0.
    for i, (images, target) in enumerate(train_loader):
        images = Variable(images)
        target = Variable(target)
        images,target = images.cuda(),target.cuda()
        batch_size_this_iter = images.size(0)

        pred = net(images)
        loss = criterion(pred,target)
        total_loss += loss.item()*batch_size_this_iter
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_data += batch_size_this_iter

        if (i+1) % 5 == 0:
            print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f, average_loss: %.4f' 
            %(ep+1, epochs, i+1, len(train_loader), loss.item(), total_loss / total_data))
    #validation process
    validation_loss = 0.0
    net.eval()
    for i, (images, target) in enumerate(test_loader):
      images = Variable(images)
      target = Variable(target)
      images, target = images.cuda(), target.cuda()

      pred = net(images)
      loss = criterion(pred, target)
      validation_loss += loss.item()
    validation_loss/=len(test_loader)
    print('Test epoch [%d/%d], average_loss: %.4f'%(ep+1, epochs, validation_loss))

train()

the dataset has 3508 images
the batch_size is 8
Epoch [1/50], Iter [5/439] Loss: 21.0562, average_loss: 22.3371
Epoch [1/50], Iter [10/439] Loss: 17.4122, average_loss: 21.6085
Epoch [1/50], Iter [15/439] Loss: 16.7288, average_loss: 20.1525
Epoch [1/50], Iter [20/439] Loss: 16.9078, average_loss: 19.0039
Epoch [1/50], Iter [25/439] Loss: 13.0131, average_loss: 18.3950
Epoch [1/50], Iter [30/439] Loss: 12.9082, average_loss: 17.3311
Epoch [1/50], Iter [35/439] Loss: 10.3557, average_loss: 16.7462
Epoch [1/50], Iter [40/439] Loss: 8.6032, average_loss: 16.3898
Epoch [1/50], Iter [45/439] Loss: 7.2090, average_loss: 15.5925
Epoch [1/50], Iter [50/439] Loss: 10.6954, average_loss: 15.0601
Epoch [1/50], Iter [55/439] Loss: 11.3000, average_loss: 14.4277
Epoch [1/50], Iter [60/439] Loss: 8.4634, average_loss: 13.9850
Epoch [1/50], Iter [65/439] Loss: 12.0113, average_loss: 13.6119
Epoch [1/50], Iter [70/439] Loss: 5.2403, average_loss: 13.2643
Epoch [1/50], Iter [75/439] Loss: 5.1016, avera

Epoch [2/50], Iter [205/439] Loss: 5.9805, average_loss: 4.8290
Epoch [2/50], Iter [210/439] Loss: 4.7257, average_loss: 4.8093
Epoch [2/50], Iter [215/439] Loss: 3.8736, average_loss: 4.8264
Epoch [2/50], Iter [220/439] Loss: 3.8101, average_loss: 4.8026
Epoch [2/50], Iter [225/439] Loss: 2.9081, average_loss: 4.7975
Epoch [2/50], Iter [230/439] Loss: 5.7600, average_loss: 4.7889
Epoch [2/50], Iter [235/439] Loss: 3.6222, average_loss: 4.7906
Epoch [2/50], Iter [240/439] Loss: 4.4411, average_loss: 4.7729
Epoch [2/50], Iter [245/439] Loss: 4.8364, average_loss: 4.7669
Epoch [2/50], Iter [250/439] Loss: 5.0279, average_loss: 4.7576
Epoch [2/50], Iter [255/439] Loss: 4.0954, average_loss: 4.7462
Epoch [2/50], Iter [260/439] Loss: 4.1648, average_loss: 4.7287
Epoch [2/50], Iter [265/439] Loss: 4.7152, average_loss: 4.7269
Epoch [2/50], Iter [270/439] Loss: 3.6682, average_loss: 4.7110
Epoch [2/50], Iter [275/439] Loss: 2.5654, average_loss: 4.6933
Epoch [2/50], Iter [280/439] Loss: 4.713

Epoch [3/50], Iter [410/439] Loss: 4.9846, average_loss: 4.7457
Epoch [3/50], Iter [415/439] Loss: 5.1098, average_loss: 4.7439
Epoch [3/50], Iter [420/439] Loss: 6.5065, average_loss: 4.7330
Epoch [3/50], Iter [425/439] Loss: 3.8933, average_loss: 4.7221
Epoch [3/50], Iter [430/439] Loss: 7.4153, average_loss: 4.7289
Epoch [3/50], Iter [435/439] Loss: 6.6049, average_loss: 4.7252
Test epoch [3/50], average_loss: 4.4521
Epoch [4/50], Iter [5/439] Loss: 3.3207, average_loss: 3.1779
Epoch [4/50], Iter [10/439] Loss: 6.3053, average_loss: 3.8732
Epoch [4/50], Iter [15/439] Loss: 3.3646, average_loss: 3.8068
Epoch [4/50], Iter [20/439] Loss: 4.8134, average_loss: 4.1402
Epoch [4/50], Iter [25/439] Loss: 3.7549, average_loss: 4.2299
Epoch [4/50], Iter [30/439] Loss: 3.9462, average_loss: 4.3195
Epoch [4/50], Iter [35/439] Loss: 3.8582, average_loss: 4.3347
Epoch [4/50], Iter [40/439] Loss: 3.1722, average_loss: 4.3552
Epoch [4/50], Iter [45/439] Loss: 4.2968, average_loss: 4.2641
Epoch [4/5

Epoch [5/50], Iter [180/439] Loss: 4.7971, average_loss: 4.3689
Epoch [5/50], Iter [185/439] Loss: 4.0791, average_loss: 4.3665
Epoch [5/50], Iter [190/439] Loss: 8.0583, average_loss: 4.3653
Epoch [5/50], Iter [195/439] Loss: 2.3359, average_loss: 4.3466
Epoch [5/50], Iter [200/439] Loss: 4.1877, average_loss: 4.3346
Epoch [5/50], Iter [205/439] Loss: 2.7916, average_loss: 4.3094
Epoch [5/50], Iter [210/439] Loss: 2.6022, average_loss: 4.2916
Epoch [5/50], Iter [215/439] Loss: 8.3259, average_loss: 4.3223
Epoch [5/50], Iter [220/439] Loss: 4.2491, average_loss: 4.3447
Epoch [5/50], Iter [225/439] Loss: 7.1058, average_loss: 4.3929
Epoch [5/50], Iter [230/439] Loss: 3.4667, average_loss: 4.3836
Epoch [5/50], Iter [235/439] Loss: 4.2031, average_loss: 4.3746
Epoch [5/50], Iter [240/439] Loss: 4.3505, average_loss: 4.3777
Epoch [5/50], Iter [245/439] Loss: 3.4553, average_loss: 4.3649
Epoch [5/50], Iter [250/439] Loss: 4.9182, average_loss: 4.3527
Epoch [5/50], Iter [255/439] Loss: 5.093

Epoch [6/50], Iter [385/439] Loss: 5.0419, average_loss: 4.0877
Epoch [6/50], Iter [390/439] Loss: 3.8571, average_loss: 4.0965
Epoch [6/50], Iter [395/439] Loss: 6.6482, average_loss: 4.1057
Epoch [6/50], Iter [400/439] Loss: 3.4541, average_loss: 4.1033
Epoch [6/50], Iter [405/439] Loss: 4.4064, average_loss: 4.1061
Epoch [6/50], Iter [410/439] Loss: 5.4408, average_loss: 4.1079
Epoch [6/50], Iter [415/439] Loss: 5.1042, average_loss: 4.1076
Epoch [6/50], Iter [420/439] Loss: 8.2575, average_loss: 4.1126
Epoch [6/50], Iter [425/439] Loss: 3.1367, average_loss: 4.1040
Epoch [6/50], Iter [430/439] Loss: 5.0215, average_loss: 4.1153
Epoch [6/50], Iter [435/439] Loss: 2.4524, average_loss: 4.1096
Test epoch [6/50], average_loss: 4.0919
Epoch [7/50], Iter [5/439] Loss: 3.2280, average_loss: 5.0603
Epoch [7/50], Iter [10/439] Loss: 3.6840, average_loss: 4.7118
Epoch [7/50], Iter [15/439] Loss: 4.5896, average_loss: 4.5438
Epoch [7/50], Iter [20/439] Loss: 2.8167, average_loss: 4.3400
Epoch

Epoch [8/50], Iter [155/439] Loss: 2.6082, average_loss: 4.0555
Epoch [8/50], Iter [160/439] Loss: 2.7112, average_loss: 4.0353
Epoch [8/50], Iter [165/439] Loss: 4.4317, average_loss: 4.0372
Epoch [8/50], Iter [170/439] Loss: 3.7422, average_loss: 4.0069
Epoch [8/50], Iter [175/439] Loss: 4.4279, average_loss: 4.0103
Epoch [8/50], Iter [180/439] Loss: 3.0673, average_loss: 4.0137
Epoch [8/50], Iter [185/439] Loss: 2.4777, average_loss: 4.0291
Epoch [8/50], Iter [190/439] Loss: 4.1035, average_loss: 4.0348
Epoch [8/50], Iter [195/439] Loss: 2.8124, average_loss: 4.0374
Epoch [8/50], Iter [200/439] Loss: 3.3581, average_loss: 4.0261
Epoch [8/50], Iter [205/439] Loss: 3.4486, average_loss: 4.0250
Epoch [8/50], Iter [210/439] Loss: 3.4437, average_loss: 4.0139
Epoch [8/50], Iter [215/439] Loss: 4.2499, average_loss: 4.0334
Epoch [8/50], Iter [220/439] Loss: 3.2110, average_loss: 4.0195
Epoch [8/50], Iter [225/439] Loss: 3.1700, average_loss: 4.0132
Epoch [8/50], Iter [230/439] Loss: 4.851

Epoch [9/50], Iter [360/439] Loss: 4.4017, average_loss: 3.9252
Epoch [9/50], Iter [365/439] Loss: 4.8006, average_loss: 3.9269
Epoch [9/50], Iter [370/439] Loss: 4.9408, average_loss: 3.9181
Epoch [9/50], Iter [375/439] Loss: 3.3886, average_loss: 3.9156
Epoch [9/50], Iter [380/439] Loss: 4.3638, average_loss: 3.9185
Epoch [9/50], Iter [385/439] Loss: 4.4030, average_loss: 3.9188
Epoch [9/50], Iter [390/439] Loss: 6.0557, average_loss: 3.9197
Epoch [9/50], Iter [395/439] Loss: 3.0670, average_loss: 3.9181
Epoch [9/50], Iter [400/439] Loss: 2.8369, average_loss: 3.9162
Epoch [9/50], Iter [405/439] Loss: 3.9836, average_loss: 3.9174
Epoch [9/50], Iter [410/439] Loss: 2.8296, average_loss: 3.9265
Epoch [9/50], Iter [415/439] Loss: 2.2466, average_loss: 3.9211
Epoch [9/50], Iter [420/439] Loss: 3.6641, average_loss: 3.9181
Epoch [9/50], Iter [425/439] Loss: 4.3832, average_loss: 3.9113
Epoch [9/50], Iter [430/439] Loss: 4.4341, average_loss: 3.9227
Epoch [9/50], Iter [435/439] Loss: 2.424

Epoch [11/50], Iter [120/439] Loss: 3.0544, average_loss: 3.8555
Epoch [11/50], Iter [125/439] Loss: 3.1709, average_loss: 3.8420
Epoch [11/50], Iter [130/439] Loss: 4.5763, average_loss: 3.8475
Epoch [11/50], Iter [135/439] Loss: 2.5424, average_loss: 3.8259
Epoch [11/50], Iter [140/439] Loss: 3.1187, average_loss: 3.8489
Epoch [11/50], Iter [145/439] Loss: 3.6192, average_loss: 3.8463
Epoch [11/50], Iter [150/439] Loss: 6.5364, average_loss: 3.8602
Epoch [11/50], Iter [155/439] Loss: 3.2188, average_loss: 3.8447
Epoch [11/50], Iter [160/439] Loss: 2.3380, average_loss: 3.8353
Epoch [11/50], Iter [165/439] Loss: 3.6920, average_loss: 3.8549
Epoch [11/50], Iter [170/439] Loss: 3.9559, average_loss: 3.8502
Epoch [11/50], Iter [175/439] Loss: 3.2254, average_loss: 3.8484
Epoch [11/50], Iter [180/439] Loss: 3.9531, average_loss: 3.8549
Epoch [11/50], Iter [185/439] Loss: 4.0947, average_loss: 3.8429
Epoch [11/50], Iter [190/439] Loss: 2.4808, average_loss: 3.8243
Epoch [11/50], Iter [195/

Epoch [12/50], Iter [315/439] Loss: 3.7546, average_loss: 3.7209
Epoch [12/50], Iter [320/439] Loss: 4.3703, average_loss: 3.7251
Epoch [12/50], Iter [325/439] Loss: 3.7354, average_loss: 3.7331
Epoch [12/50], Iter [330/439] Loss: 4.1019, average_loss: 3.7308
Epoch [12/50], Iter [335/439] Loss: 3.0640, average_loss: 3.7407
Epoch [12/50], Iter [340/439] Loss: 3.4062, average_loss: 3.7363
Epoch [12/50], Iter [345/439] Loss: 4.2135, average_loss: 3.7442
Epoch [12/50], Iter [350/439] Loss: 2.9516, average_loss: 3.7328
Epoch [12/50], Iter [355/439] Loss: 2.7308, average_loss: 3.7379
Epoch [12/50], Iter [360/439] Loss: 2.4927, average_loss: 3.7354
Epoch [12/50], Iter [365/439] Loss: 3.2781, average_loss: 3.7325
Epoch [12/50], Iter [370/439] Loss: 4.0026, average_loss: 3.7368
Epoch [12/50], Iter [375/439] Loss: 2.7766, average_loss: 3.7341
Epoch [12/50], Iter [380/439] Loss: 2.9396, average_loss: 3.7313
Epoch [12/50], Iter [385/439] Loss: 4.1006, average_loss: 3.7317
Epoch [12/50], Iter [390/

Epoch [14/50], Iter [75/439] Loss: 2.2211, average_loss: 3.5622
Epoch [14/50], Iter [80/439] Loss: 3.6928, average_loss: 3.5698
Epoch [14/50], Iter [85/439] Loss: 3.0047, average_loss: 3.5931
Epoch [14/50], Iter [90/439] Loss: 2.2758, average_loss: 3.5900
Epoch [14/50], Iter [95/439] Loss: 4.2767, average_loss: 3.5880
Epoch [14/50], Iter [100/439] Loss: 3.2978, average_loss: 3.5899
Epoch [14/50], Iter [105/439] Loss: 3.1849, average_loss: 3.5840
Epoch [14/50], Iter [110/439] Loss: 3.5779, average_loss: 3.6335
Epoch [14/50], Iter [115/439] Loss: 2.9392, average_loss: 3.6149
Epoch [14/50], Iter [120/439] Loss: 3.2782, average_loss: 3.6194
Epoch [14/50], Iter [125/439] Loss: 4.6182, average_loss: 3.6712
Epoch [14/50], Iter [130/439] Loss: 4.4545, average_loss: 3.6638
Epoch [14/50], Iter [135/439] Loss: 2.5687, average_loss: 3.6654
Epoch [14/50], Iter [140/439] Loss: 3.7598, average_loss: 3.6527
Epoch [14/50], Iter [145/439] Loss: 3.5582, average_loss: 3.6535
Epoch [14/50], Iter [150/439] 

Epoch [15/50], Iter [270/439] Loss: 5.5357, average_loss: 3.6640
Epoch [15/50], Iter [275/439] Loss: 3.7297, average_loss: 3.6550
Epoch [15/50], Iter [280/439] Loss: 4.8469, average_loss: 3.6643
Epoch [15/50], Iter [285/439] Loss: 4.0487, average_loss: 3.6722
Epoch [15/50], Iter [290/439] Loss: 2.7813, average_loss: 3.6707
Epoch [15/50], Iter [295/439] Loss: 2.2684, average_loss: 3.6589
Epoch [15/50], Iter [300/439] Loss: 4.2804, average_loss: 3.6561
Epoch [15/50], Iter [305/439] Loss: 2.4186, average_loss: 3.6498
Epoch [15/50], Iter [310/439] Loss: 2.9521, average_loss: 3.6386
Epoch [15/50], Iter [315/439] Loss: 3.0472, average_loss: 3.6390
Epoch [15/50], Iter [320/439] Loss: 3.1383, average_loss: 3.6441
Epoch [15/50], Iter [325/439] Loss: 4.6561, average_loss: 3.6401
Epoch [15/50], Iter [330/439] Loss: 3.9290, average_loss: 3.6371
Epoch [15/50], Iter [335/439] Loss: 3.9709, average_loss: 3.6417
Epoch [15/50], Iter [340/439] Loss: 3.4675, average_loss: 3.6350
Epoch [15/50], Iter [345/

Epoch [17/50], Iter [30/439] Loss: 2.8453, average_loss: 3.7098
Epoch [17/50], Iter [35/439] Loss: 3.8554, average_loss: 3.6180
Epoch [17/50], Iter [40/439] Loss: 3.7456, average_loss: 3.6195
Epoch [17/50], Iter [45/439] Loss: 2.1994, average_loss: 3.6576
Epoch [17/50], Iter [50/439] Loss: 4.7333, average_loss: 3.6575
Epoch [17/50], Iter [55/439] Loss: 3.1793, average_loss: 3.6086
Epoch [17/50], Iter [60/439] Loss: 4.4195, average_loss: 3.6152
Epoch [17/50], Iter [65/439] Loss: 4.0863, average_loss: 3.5963
Epoch [17/50], Iter [70/439] Loss: 2.7593, average_loss: 3.5979
Epoch [17/50], Iter [75/439] Loss: 4.6039, average_loss: 3.6150
Epoch [17/50], Iter [80/439] Loss: 3.4399, average_loss: 3.6244
Epoch [17/50], Iter [85/439] Loss: 3.0173, average_loss: 3.6503
Epoch [17/50], Iter [90/439] Loss: 2.1042, average_loss: 3.6412
Epoch [17/50], Iter [95/439] Loss: 3.4848, average_loss: 3.6044
Epoch [17/50], Iter [100/439] Loss: 4.3079, average_loss: 3.6308
Epoch [17/50], Iter [105/439] Loss: 1.6

Epoch [18/50], Iter [225/439] Loss: 4.6145, average_loss: 3.5612
Epoch [18/50], Iter [230/439] Loss: 2.5404, average_loss: 3.5608
Epoch [18/50], Iter [235/439] Loss: 2.8680, average_loss: 3.5490
Epoch [18/50], Iter [240/439] Loss: 2.7794, average_loss: 3.5357
Epoch [18/50], Iter [245/439] Loss: 1.6863, average_loss: 3.5102
Epoch [18/50], Iter [250/439] Loss: 2.7948, average_loss: 3.5093
Epoch [18/50], Iter [255/439] Loss: 2.6955, average_loss: 3.5029
Epoch [18/50], Iter [260/439] Loss: 3.4660, average_loss: 3.5052
Epoch [18/50], Iter [265/439] Loss: 2.8841, average_loss: 3.4992
Epoch [18/50], Iter [270/439] Loss: 3.1447, average_loss: 3.5013
Epoch [18/50], Iter [275/439] Loss: 4.3205, average_loss: 3.4983
Epoch [18/50], Iter [280/439] Loss: 4.0204, average_loss: 3.4984
Epoch [18/50], Iter [285/439] Loss: 4.1770, average_loss: 3.4921
Epoch [18/50], Iter [290/439] Loss: 3.3180, average_loss: 3.4903
Epoch [18/50], Iter [295/439] Loss: 3.4479, average_loss: 3.4927
Epoch [18/50], Iter [300/

Epoch [19/50], Iter [420/439] Loss: 2.7717, average_loss: 3.4360
Epoch [19/50], Iter [425/439] Loss: 2.7310, average_loss: 3.4292
Epoch [19/50], Iter [430/439] Loss: 3.0757, average_loss: 3.4284
Epoch [19/50], Iter [435/439] Loss: 3.4175, average_loss: 3.4285
Test epoch [19/50], average_loss: 3.5209
Epoch [20/50], Iter [5/439] Loss: 2.2726, average_loss: 3.1405
Epoch [20/50], Iter [10/439] Loss: 2.3628, average_loss: 3.0848
Epoch [20/50], Iter [15/439] Loss: 4.3991, average_loss: 3.0235
Epoch [20/50], Iter [20/439] Loss: 3.2150, average_loss: 3.1297
Epoch [20/50], Iter [25/439] Loss: 2.4669, average_loss: 3.0501
Epoch [20/50], Iter [30/439] Loss: 3.3074, average_loss: 2.9790
Epoch [20/50], Iter [35/439] Loss: 3.0025, average_loss: 3.0653
Epoch [20/50], Iter [40/439] Loss: 3.1366, average_loss: 3.1360
Epoch [20/50], Iter [45/439] Loss: 3.2982, average_loss: 3.1603
Epoch [20/50], Iter [50/439] Loss: 3.1877, average_loss: 3.1881
Epoch [20/50], Iter [55/439] Loss: 1.8501, average_loss: 3.2

Epoch [21/50], Iter [180/439] Loss: 2.8854, average_loss: 3.4037
Epoch [21/50], Iter [185/439] Loss: 2.3344, average_loss: 3.3926
Epoch [21/50], Iter [190/439] Loss: 1.7949, average_loss: 3.3698
Epoch [21/50], Iter [195/439] Loss: 4.6919, average_loss: 3.3760
Epoch [21/50], Iter [200/439] Loss: 3.3846, average_loss: 3.3798
Epoch [21/50], Iter [205/439] Loss: 4.3581, average_loss: 3.3741
Epoch [21/50], Iter [210/439] Loss: 4.5457, average_loss: 3.3725
Epoch [21/50], Iter [215/439] Loss: 2.8890, average_loss: 3.3660
Epoch [21/50], Iter [220/439] Loss: 3.2483, average_loss: 3.3671
Epoch [21/50], Iter [225/439] Loss: 3.1928, average_loss: 3.3535
Epoch [21/50], Iter [230/439] Loss: 2.9827, average_loss: 3.3440
Epoch [21/50], Iter [235/439] Loss: 3.0689, average_loss: 3.3529
Epoch [21/50], Iter [240/439] Loss: 2.9347, average_loss: 3.3604
Epoch [21/50], Iter [245/439] Loss: 2.3155, average_loss: 3.3577
Epoch [21/50], Iter [250/439] Loss: 3.3043, average_loss: 3.3543
Epoch [21/50], Iter [255/

Epoch [22/50], Iter [375/439] Loss: 4.9239, average_loss: 3.3100
Epoch [22/50], Iter [380/439] Loss: 2.4338, average_loss: 3.3050
Epoch [22/50], Iter [385/439] Loss: 2.1965, average_loss: 3.3047
Epoch [22/50], Iter [390/439] Loss: 2.9221, average_loss: 3.3158
Epoch [22/50], Iter [395/439] Loss: 3.2957, average_loss: 3.3079
Epoch [22/50], Iter [400/439] Loss: 4.6245, average_loss: 3.3167
Epoch [22/50], Iter [405/439] Loss: 2.9990, average_loss: 3.3188
Epoch [22/50], Iter [410/439] Loss: 3.8323, average_loss: 3.3168
Epoch [22/50], Iter [415/439] Loss: 4.3746, average_loss: 3.3296
Epoch [22/50], Iter [420/439] Loss: 3.8327, average_loss: 3.3451
Epoch [22/50], Iter [425/439] Loss: 1.6260, average_loss: 3.3379
Epoch [22/50], Iter [430/439] Loss: 5.1609, average_loss: 3.3398
Epoch [22/50], Iter [435/439] Loss: 2.0832, average_loss: 3.3453
Test epoch [22/50], average_loss: 3.3312
Epoch [23/50], Iter [5/439] Loss: 3.6451, average_loss: 3.0888
Epoch [23/50], Iter [10/439] Loss: 3.6156, average_

Epoch [24/50], Iter [135/439] Loss: 3.7915, average_loss: 3.2279
Epoch [24/50], Iter [140/439] Loss: 3.5683, average_loss: 3.2152
Epoch [24/50], Iter [145/439] Loss: 2.5860, average_loss: 3.1963
Epoch [24/50], Iter [150/439] Loss: 4.5389, average_loss: 3.1997
Epoch [24/50], Iter [155/439] Loss: 2.4372, average_loss: 3.2168
Epoch [24/50], Iter [160/439] Loss: 4.4861, average_loss: 3.2348
Epoch [24/50], Iter [165/439] Loss: 2.1350, average_loss: 3.2323
Epoch [24/50], Iter [170/439] Loss: 2.8840, average_loss: 3.2384
Epoch [24/50], Iter [175/439] Loss: 3.9271, average_loss: 3.2360
Epoch [24/50], Iter [180/439] Loss: 4.4574, average_loss: 3.2448
Epoch [24/50], Iter [185/439] Loss: 4.7214, average_loss: 3.2607
Epoch [24/50], Iter [190/439] Loss: 3.7978, average_loss: 3.2926
Epoch [24/50], Iter [195/439] Loss: 2.1837, average_loss: 3.3045
Epoch [24/50], Iter [200/439] Loss: 3.6008, average_loss: 3.3052
Epoch [24/50], Iter [205/439] Loss: 2.6046, average_loss: 3.2934
Epoch [24/50], Iter [210/

Epoch [25/50], Iter [330/439] Loss: 2.9060, average_loss: 3.3156
Epoch [25/50], Iter [335/439] Loss: 2.9892, average_loss: 3.3142
Epoch [25/50], Iter [340/439] Loss: 3.2335, average_loss: 3.3165
Epoch [25/50], Iter [345/439] Loss: 3.7674, average_loss: 3.3067
Epoch [25/50], Iter [350/439] Loss: 3.1059, average_loss: 3.3230
Epoch [25/50], Iter [355/439] Loss: 2.8586, average_loss: 3.3333
Epoch [25/50], Iter [360/439] Loss: 3.9097, average_loss: 3.3378
Epoch [25/50], Iter [365/439] Loss: 1.9491, average_loss: 3.3373
Epoch [25/50], Iter [370/439] Loss: 2.2210, average_loss: 3.3319
Epoch [25/50], Iter [375/439] Loss: 2.3535, average_loss: 3.3326
Epoch [25/50], Iter [380/439] Loss: 2.1609, average_loss: 3.3230
Epoch [25/50], Iter [385/439] Loss: 1.9916, average_loss: 3.3149
Epoch [25/50], Iter [390/439] Loss: 3.9194, average_loss: 3.3163
Epoch [25/50], Iter [395/439] Loss: 2.9588, average_loss: 3.3251
Epoch [25/50], Iter [400/439] Loss: 1.4174, average_loss: 3.3200
Epoch [25/50], Iter [405/

Epoch [27/50], Iter [90/439] Loss: 2.8580, average_loss: 3.0700
Epoch [27/50], Iter [95/439] Loss: 2.8248, average_loss: 3.0900
Epoch [27/50], Iter [100/439] Loss: 3.0737, average_loss: 3.1245
Epoch [27/50], Iter [105/439] Loss: 4.0684, average_loss: 3.1067
Epoch [27/50], Iter [110/439] Loss: 2.3931, average_loss: 3.1002
Epoch [27/50], Iter [115/439] Loss: 2.9596, average_loss: 3.1274
Epoch [27/50], Iter [120/439] Loss: 2.3149, average_loss: 3.1323
Epoch [27/50], Iter [125/439] Loss: 3.5746, average_loss: 3.1455
Epoch [27/50], Iter [130/439] Loss: 3.4914, average_loss: 3.1527
Epoch [27/50], Iter [135/439] Loss: 2.8147, average_loss: 3.1748
Epoch [27/50], Iter [140/439] Loss: 2.3455, average_loss: 3.1791
Epoch [27/50], Iter [145/439] Loss: 5.5832, average_loss: 3.1772
Epoch [27/50], Iter [150/439] Loss: 3.3798, average_loss: 3.1847
Epoch [27/50], Iter [155/439] Loss: 2.9627, average_loss: 3.1932
Epoch [27/50], Iter [160/439] Loss: 5.2669, average_loss: 3.2052
Epoch [27/50], Iter [165/43

Epoch [28/50], Iter [285/439] Loss: 1.9712, average_loss: 3.2636
Epoch [28/50], Iter [290/439] Loss: 4.5750, average_loss: 3.2575
Epoch [28/50], Iter [295/439] Loss: 3.6049, average_loss: 3.2433
Epoch [28/50], Iter [300/439] Loss: 3.3484, average_loss: 3.2415
Epoch [28/50], Iter [305/439] Loss: 2.7026, average_loss: 3.2381
Epoch [28/50], Iter [310/439] Loss: 1.9643, average_loss: 3.2228
Epoch [28/50], Iter [315/439] Loss: 2.0597, average_loss: 3.2125
Epoch [28/50], Iter [320/439] Loss: 3.9939, average_loss: 3.2215
Epoch [28/50], Iter [325/439] Loss: 2.4393, average_loss: 3.2205
Epoch [28/50], Iter [330/439] Loss: 2.4222, average_loss: 3.2197
Epoch [28/50], Iter [335/439] Loss: 3.1801, average_loss: 3.2150
Epoch [28/50], Iter [340/439] Loss: 3.8385, average_loss: 3.2177
Epoch [28/50], Iter [345/439] Loss: 1.9911, average_loss: 3.2109
Epoch [28/50], Iter [350/439] Loss: 3.7045, average_loss: 3.2214
Epoch [28/50], Iter [355/439] Loss: 2.9577, average_loss: 3.2146
Epoch [28/50], Iter [360/

Epoch [30/50], Iter [45/439] Loss: 2.7475, average_loss: 3.2249
Epoch [30/50], Iter [50/439] Loss: 3.0457, average_loss: 3.2496
Epoch [30/50], Iter [55/439] Loss: 1.9975, average_loss: 3.2227
Epoch [30/50], Iter [60/439] Loss: 2.1093, average_loss: 3.1671
Epoch [30/50], Iter [65/439] Loss: 2.5392, average_loss: 3.1127
Epoch [30/50], Iter [70/439] Loss: 2.2564, average_loss: 3.1200
Epoch [30/50], Iter [75/439] Loss: 2.3187, average_loss: 3.0965
Epoch [30/50], Iter [80/439] Loss: 1.8230, average_loss: 3.0715
Epoch [30/50], Iter [85/439] Loss: 2.8316, average_loss: 3.0864
Epoch [30/50], Iter [90/439] Loss: 2.7341, average_loss: 3.1285
Epoch [30/50], Iter [95/439] Loss: 2.7690, average_loss: 3.1144
Epoch [30/50], Iter [100/439] Loss: 2.2516, average_loss: 3.1349
Epoch [30/50], Iter [105/439] Loss: 2.3808, average_loss: 3.1551
Epoch [30/50], Iter [110/439] Loss: 3.7941, average_loss: 3.1448
Epoch [30/50], Iter [115/439] Loss: 2.3729, average_loss: 3.1390
Epoch [30/50], Iter [120/439] Loss: 

Epoch [31/50], Iter [240/439] Loss: 3.0875, average_loss: 2.8587
Epoch [31/50], Iter [245/439] Loss: 3.2664, average_loss: 2.8532
Epoch [31/50], Iter [250/439] Loss: 1.9669, average_loss: 2.8652
Epoch [31/50], Iter [255/439] Loss: 3.2122, average_loss: 2.8670
Epoch [31/50], Iter [260/439] Loss: 2.0159, average_loss: 2.8725
Epoch [31/50], Iter [265/439] Loss: 4.0763, average_loss: 2.8686
Epoch [31/50], Iter [270/439] Loss: 3.5888, average_loss: 2.8867
Epoch [31/50], Iter [275/439] Loss: 3.6407, average_loss: 2.8827
Epoch [31/50], Iter [280/439] Loss: 3.4014, average_loss: 2.8851
Epoch [31/50], Iter [285/439] Loss: 2.0007, average_loss: 2.8919
Epoch [31/50], Iter [290/439] Loss: 1.1229, average_loss: 2.8907
Epoch [31/50], Iter [295/439] Loss: 3.4969, average_loss: 2.8956
Epoch [31/50], Iter [300/439] Loss: 1.6315, average_loss: 2.8907
Epoch [31/50], Iter [305/439] Loss: 3.0137, average_loss: 2.8910
Epoch [31/50], Iter [310/439] Loss: 1.9997, average_loss: 2.8842
Epoch [31/50], Iter [315/

Epoch [32/50], Iter [435/439] Loss: 2.6518, average_loss: 2.7774
Test epoch [32/50], average_loss: 2.8624
Epoch [33/50], Iter [5/439] Loss: 3.3906, average_loss: 2.7156
Epoch [33/50], Iter [10/439] Loss: 1.7402, average_loss: 2.6009
Epoch [33/50], Iter [15/439] Loss: 2.0172, average_loss: 2.4120
Epoch [33/50], Iter [20/439] Loss: 3.1328, average_loss: 2.7158
Epoch [33/50], Iter [25/439] Loss: 3.4445, average_loss: 2.8244
Epoch [33/50], Iter [30/439] Loss: 1.9649, average_loss: 2.8026
Epoch [33/50], Iter [35/439] Loss: 2.1881, average_loss: 2.8066
Epoch [33/50], Iter [40/439] Loss: 3.0205, average_loss: 2.8050
Epoch [33/50], Iter [45/439] Loss: 2.7196, average_loss: 2.7857
Epoch [33/50], Iter [50/439] Loss: 2.0633, average_loss: 2.7041
Epoch [33/50], Iter [55/439] Loss: 2.5800, average_loss: 2.6692
Epoch [33/50], Iter [60/439] Loss: 3.3101, average_loss: 2.6755
Epoch [33/50], Iter [65/439] Loss: 2.2748, average_loss: 2.6667
Epoch [33/50], Iter [70/439] Loss: 3.1587, average_loss: 2.6773

Epoch [34/50], Iter [195/439] Loss: 2.0870, average_loss: 2.7193
Epoch [34/50], Iter [200/439] Loss: 3.2224, average_loss: 2.7192
Epoch [34/50], Iter [205/439] Loss: 3.2444, average_loss: 2.7118
Epoch [34/50], Iter [210/439] Loss: 3.3015, average_loss: 2.7046
Epoch [34/50], Iter [215/439] Loss: 3.4628, average_loss: 2.7105
Epoch [34/50], Iter [220/439] Loss: 1.4716, average_loss: 2.7157
Epoch [34/50], Iter [225/439] Loss: 2.8982, average_loss: 2.7155
Epoch [34/50], Iter [230/439] Loss: 1.9380, average_loss: 2.7136
Epoch [34/50], Iter [235/439] Loss: 3.8667, average_loss: 2.7259
Epoch [34/50], Iter [240/439] Loss: 1.5485, average_loss: 2.7224
Epoch [34/50], Iter [245/439] Loss: 2.2277, average_loss: 2.7185
Epoch [34/50], Iter [250/439] Loss: 2.3462, average_loss: 2.7217
Epoch [34/50], Iter [255/439] Loss: 3.0498, average_loss: 2.7244
Epoch [34/50], Iter [260/439] Loss: 2.7529, average_loss: 2.7150
Epoch [34/50], Iter [265/439] Loss: 3.6351, average_loss: 2.7240
Epoch [34/50], Iter [270/

Epoch [35/50], Iter [390/439] Loss: 3.5279, average_loss: 2.7416
Epoch [35/50], Iter [395/439] Loss: 1.7682, average_loss: 2.7320
Epoch [35/50], Iter [400/439] Loss: 2.3449, average_loss: 2.7325
Epoch [35/50], Iter [405/439] Loss: 2.7308, average_loss: 2.7248
Epoch [35/50], Iter [410/439] Loss: 2.9573, average_loss: 2.7236
Epoch [35/50], Iter [415/439] Loss: 3.0614, average_loss: 2.7188
Epoch [35/50], Iter [420/439] Loss: 1.9062, average_loss: 2.7119
Epoch [35/50], Iter [425/439] Loss: 1.2533, average_loss: 2.7121
Epoch [35/50], Iter [430/439] Loss: 2.4826, average_loss: 2.7097
Epoch [35/50], Iter [435/439] Loss: 1.8843, average_loss: 2.7053
Test epoch [35/50], average_loss: 2.7842
Epoch [36/50], Iter [5/439] Loss: 2.4355, average_loss: 2.2258
Epoch [36/50], Iter [10/439] Loss: 2.3647, average_loss: 2.5748
Epoch [36/50], Iter [15/439] Loss: 1.9996, average_loss: 2.4035
Epoch [36/50], Iter [20/439] Loss: 2.0398, average_loss: 2.5318
Epoch [36/50], Iter [25/439] Loss: 1.5268, average_los

Epoch [37/50], Iter [150/439] Loss: 3.0954, average_loss: 2.6559
Epoch [37/50], Iter [155/439] Loss: 4.1581, average_loss: 2.6632
Epoch [37/50], Iter [160/439] Loss: 3.6311, average_loss: 2.6673
Epoch [37/50], Iter [165/439] Loss: 2.0752, average_loss: 2.6580
Epoch [37/50], Iter [170/439] Loss: 1.7845, average_loss: 2.6411
Epoch [37/50], Iter [175/439] Loss: 2.4096, average_loss: 2.6412
Epoch [37/50], Iter [180/439] Loss: 2.8633, average_loss: 2.6507
Epoch [37/50], Iter [185/439] Loss: 3.0539, average_loss: 2.6505
Epoch [37/50], Iter [190/439] Loss: 1.8872, average_loss: 2.6368
Epoch [37/50], Iter [195/439] Loss: 1.6296, average_loss: 2.6117
Epoch [37/50], Iter [200/439] Loss: 2.8151, average_loss: 2.6060
Epoch [37/50], Iter [205/439] Loss: 3.4662, average_loss: 2.6118
Epoch [37/50], Iter [210/439] Loss: 3.4943, average_loss: 2.6172
Epoch [37/50], Iter [215/439] Loss: 2.8868, average_loss: 2.6123
Epoch [37/50], Iter [220/439] Loss: 2.7617, average_loss: 2.6128
Epoch [37/50], Iter [225/

Epoch [38/50], Iter [345/439] Loss: 2.1859, average_loss: 2.5736
Epoch [38/50], Iter [350/439] Loss: 1.8674, average_loss: 2.5728
Epoch [38/50], Iter [355/439] Loss: 4.9886, average_loss: 2.5789
Epoch [38/50], Iter [360/439] Loss: 2.6826, average_loss: 2.5781
Epoch [38/50], Iter [365/439] Loss: 3.0510, average_loss: 2.5802
Epoch [38/50], Iter [370/439] Loss: 2.1228, average_loss: 2.5800
Epoch [38/50], Iter [375/439] Loss: 1.8890, average_loss: 2.5767
Epoch [38/50], Iter [380/439] Loss: 4.2513, average_loss: 2.5917
Epoch [38/50], Iter [385/439] Loss: 2.6517, average_loss: 2.5857
Epoch [38/50], Iter [390/439] Loss: 3.2812, average_loss: 2.5901
Epoch [38/50], Iter [395/439] Loss: 2.1422, average_loss: 2.5849
Epoch [38/50], Iter [400/439] Loss: 3.2008, average_loss: 2.5882
Epoch [38/50], Iter [405/439] Loss: 1.5956, average_loss: 2.5880
Epoch [38/50], Iter [410/439] Loss: 2.8946, average_loss: 2.5821
Epoch [38/50], Iter [415/439] Loss: 2.5050, average_loss: 2.5895
Epoch [38/50], Iter [420/

Epoch [40/50], Iter [105/439] Loss: 1.9807, average_loss: 2.3795
Epoch [40/50], Iter [110/439] Loss: 2.2679, average_loss: 2.3940
Epoch [40/50], Iter [115/439] Loss: 1.6561, average_loss: 2.3726
Epoch [40/50], Iter [120/439] Loss: 2.9333, average_loss: 2.3792
Epoch [40/50], Iter [125/439] Loss: 1.4729, average_loss: 2.3868
Epoch [40/50], Iter [130/439] Loss: 1.3977, average_loss: 2.3791
Epoch [40/50], Iter [135/439] Loss: 3.5772, average_loss: 2.3878
Epoch [40/50], Iter [140/439] Loss: 1.1571, average_loss: 2.3828
Epoch [40/50], Iter [145/439] Loss: 1.3358, average_loss: 2.3919
Epoch [40/50], Iter [150/439] Loss: 2.0656, average_loss: 2.3942
Epoch [40/50], Iter [155/439] Loss: 2.0005, average_loss: 2.3974
Epoch [40/50], Iter [160/439] Loss: 2.1679, average_loss: 2.4012
Epoch [40/50], Iter [165/439] Loss: 0.8325, average_loss: 2.3844
Epoch [40/50], Iter [170/439] Loss: 3.1490, average_loss: 2.3977
Epoch [40/50], Iter [175/439] Loss: 2.7402, average_loss: 2.4193
Epoch [40/50], Iter [180/

Epoch [41/50], Iter [300/439] Loss: 3.6474, average_loss: 2.4922
Epoch [41/50], Iter [305/439] Loss: 2.3361, average_loss: 2.4923
Epoch [41/50], Iter [310/439] Loss: 1.2103, average_loss: 2.4876
Epoch [41/50], Iter [315/439] Loss: 2.7930, average_loss: 2.4881
Epoch [41/50], Iter [320/439] Loss: 1.5661, average_loss: 2.4900
Epoch [41/50], Iter [325/439] Loss: 2.4287, average_loss: 2.4907
Epoch [41/50], Iter [330/439] Loss: 2.3212, average_loss: 2.4892
Epoch [41/50], Iter [335/439] Loss: 2.2389, average_loss: 2.5073
Epoch [41/50], Iter [340/439] Loss: 4.1361, average_loss: 2.5059
Epoch [41/50], Iter [345/439] Loss: 2.5253, average_loss: 2.4993
Epoch [41/50], Iter [350/439] Loss: 5.6567, average_loss: 2.5026
Epoch [41/50], Iter [355/439] Loss: 2.8002, average_loss: 2.5033
Epoch [41/50], Iter [360/439] Loss: 1.8910, average_loss: 2.4999
Epoch [41/50], Iter [365/439] Loss: 1.6997, average_loss: 2.5003
Epoch [41/50], Iter [370/439] Loss: 1.6095, average_loss: 2.4993
Epoch [41/50], Iter [375/

Epoch [43/50], Iter [60/439] Loss: 2.0456, average_loss: 2.6043
Epoch [43/50], Iter [65/439] Loss: 1.8720, average_loss: 2.5788
Epoch [43/50], Iter [70/439] Loss: 1.4124, average_loss: 2.5445
Epoch [43/50], Iter [75/439] Loss: 2.6044, average_loss: 2.5418
Epoch [43/50], Iter [80/439] Loss: 1.8840, average_loss: 2.5370
Epoch [43/50], Iter [85/439] Loss: 3.3361, average_loss: 2.5529
Epoch [43/50], Iter [90/439] Loss: 2.2899, average_loss: 2.5289
Epoch [43/50], Iter [95/439] Loss: 0.7696, average_loss: 2.5079
Epoch [43/50], Iter [100/439] Loss: 2.8437, average_loss: 2.5029
Epoch [43/50], Iter [105/439] Loss: 2.4889, average_loss: 2.5138
Epoch [43/50], Iter [110/439] Loss: 3.2623, average_loss: 2.5067
Epoch [43/50], Iter [115/439] Loss: 2.5562, average_loss: 2.5032
Epoch [43/50], Iter [120/439] Loss: 2.0952, average_loss: 2.5124
Epoch [43/50], Iter [125/439] Loss: 4.7043, average_loss: 2.5273
Epoch [43/50], Iter [130/439] Loss: 1.0213, average_loss: 2.4977
Epoch [43/50], Iter [135/439] Los

Epoch [44/50], Iter [255/439] Loss: 2.6887, average_loss: 2.4845
Epoch [44/50], Iter [260/439] Loss: 3.3345, average_loss: 2.4932
Epoch [44/50], Iter [265/439] Loss: 1.8204, average_loss: 2.4913
Epoch [44/50], Iter [270/439] Loss: 2.4513, average_loss: 2.4806
Epoch [44/50], Iter [275/439] Loss: 3.0801, average_loss: 2.4880
Epoch [44/50], Iter [280/439] Loss: 2.7138, average_loss: 2.4865
Epoch [44/50], Iter [285/439] Loss: 2.8749, average_loss: 2.4841
Epoch [44/50], Iter [290/439] Loss: 2.1214, average_loss: 2.4873
Epoch [44/50], Iter [295/439] Loss: 2.0167, average_loss: 2.4813
Epoch [44/50], Iter [300/439] Loss: 2.6098, average_loss: 2.4753
Epoch [44/50], Iter [305/439] Loss: 2.2669, average_loss: 2.4825
Epoch [44/50], Iter [310/439] Loss: 3.8047, average_loss: 2.4926
Epoch [44/50], Iter [315/439] Loss: 2.1618, average_loss: 2.4830
Epoch [44/50], Iter [320/439] Loss: 1.8392, average_loss: 2.4748
Epoch [44/50], Iter [325/439] Loss: 2.2993, average_loss: 2.4711
Epoch [44/50], Iter [330/

Epoch [46/50], Iter [15/439] Loss: 1.7833, average_loss: 2.3762
Epoch [46/50], Iter [20/439] Loss: 1.2169, average_loss: 2.3726
Epoch [46/50], Iter [25/439] Loss: 1.8687, average_loss: 2.4227
Epoch [46/50], Iter [30/439] Loss: 1.4765, average_loss: 2.3819
Epoch [46/50], Iter [35/439] Loss: 5.0766, average_loss: 2.4893
Epoch [46/50], Iter [40/439] Loss: 1.1749, average_loss: 2.4782
Epoch [46/50], Iter [45/439] Loss: 1.7942, average_loss: 2.4534
Epoch [46/50], Iter [50/439] Loss: 3.4749, average_loss: 2.4440
Epoch [46/50], Iter [55/439] Loss: 7.3474, average_loss: 2.5317
Epoch [46/50], Iter [60/439] Loss: 1.9018, average_loss: 2.4830
Epoch [46/50], Iter [65/439] Loss: 1.9933, average_loss: 2.4607
Epoch [46/50], Iter [70/439] Loss: 1.5347, average_loss: 2.4514
Epoch [46/50], Iter [75/439] Loss: 1.6849, average_loss: 2.4479
Epoch [46/50], Iter [80/439] Loss: 3.0513, average_loss: 2.4502
Epoch [46/50], Iter [85/439] Loss: 2.1491, average_loss: 2.4560
Epoch [46/50], Iter [90/439] Loss: 2.619

Epoch [47/50], Iter [210/439] Loss: 1.5790, average_loss: 2.3904
Epoch [47/50], Iter [215/439] Loss: 2.1207, average_loss: 2.3878
Epoch [47/50], Iter [220/439] Loss: 1.9724, average_loss: 2.3876
Epoch [47/50], Iter [225/439] Loss: 2.5296, average_loss: 2.3830
Epoch [47/50], Iter [230/439] Loss: 3.5141, average_loss: 2.3818
Epoch [47/50], Iter [235/439] Loss: 4.4038, average_loss: 2.3828
Epoch [47/50], Iter [240/439] Loss: 2.8614, average_loss: 2.3802
Epoch [47/50], Iter [245/439] Loss: 1.4771, average_loss: 2.3815
Epoch [47/50], Iter [250/439] Loss: 3.1729, average_loss: 2.3791
Epoch [47/50], Iter [255/439] Loss: 1.1639, average_loss: 2.3742
Epoch [47/50], Iter [260/439] Loss: 3.7653, average_loss: 2.3933
Epoch [47/50], Iter [265/439] Loss: 2.4555, average_loss: 2.3948
Epoch [47/50], Iter [270/439] Loss: 2.7267, average_loss: 2.3956
Epoch [47/50], Iter [275/439] Loss: 1.8177, average_loss: 2.3939
Epoch [47/50], Iter [280/439] Loss: 2.8068, average_loss: 2.4080
Epoch [47/50], Iter [285/

Epoch [48/50], Iter [405/439] Loss: 1.6040, average_loss: 2.3858
Epoch [48/50], Iter [410/439] Loss: 2.6929, average_loss: 2.3886
Epoch [48/50], Iter [415/439] Loss: 2.9985, average_loss: 2.3916
Epoch [48/50], Iter [420/439] Loss: 1.6135, average_loss: 2.3945
Epoch [48/50], Iter [425/439] Loss: 2.0881, average_loss: 2.3950
Epoch [48/50], Iter [430/439] Loss: 2.1695, average_loss: 2.3923
Epoch [48/50], Iter [435/439] Loss: 1.3270, average_loss: 2.3928
Test epoch [48/50], average_loss: 2.6000
Epoch [49/50], Iter [5/439] Loss: 2.1155, average_loss: 2.3775
Epoch [49/50], Iter [10/439] Loss: 2.4809, average_loss: 2.3565
Epoch [49/50], Iter [15/439] Loss: 3.0919, average_loss: 2.3420
Epoch [49/50], Iter [20/439] Loss: 1.6479, average_loss: 2.2998
Epoch [49/50], Iter [25/439] Loss: 1.9301, average_loss: 2.1913
Epoch [49/50], Iter [30/439] Loss: 4.1854, average_loss: 2.3067
Epoch [49/50], Iter [35/439] Loss: 1.4054, average_loss: 2.2669
Epoch [49/50], Iter [40/439] Loss: 2.5792, average_loss: 

Epoch [50/50], Iter [165/439] Loss: 4.7378, average_loss: 2.3987
Epoch [50/50], Iter [170/439] Loss: 1.5683, average_loss: 2.3963
Epoch [50/50], Iter [175/439] Loss: 2.1967, average_loss: 2.4072
Epoch [50/50], Iter [180/439] Loss: 1.8288, average_loss: 2.3985
Epoch [50/50], Iter [185/439] Loss: 2.8334, average_loss: 2.4044
Epoch [50/50], Iter [190/439] Loss: 1.5634, average_loss: 2.4104
Epoch [50/50], Iter [195/439] Loss: 2.1119, average_loss: 2.4037
Epoch [50/50], Iter [200/439] Loss: 2.1352, average_loss: 2.3927
Epoch [50/50], Iter [205/439] Loss: 2.3312, average_loss: 2.3936
Epoch [50/50], Iter [210/439] Loss: 2.3696, average_loss: 2.3841
Epoch [50/50], Iter [215/439] Loss: 1.7810, average_loss: 2.3818
Epoch [50/50], Iter [220/439] Loss: 3.0644, average_loss: 2.3871
Epoch [50/50], Iter [225/439] Loss: 1.5788, average_loss: 2.3920
Epoch [50/50], Iter [230/439] Loss: 2.5045, average_loss: 2.3860
Epoch [50/50], Iter [235/439] Loss: 3.3787, average_loss: 2.3814
Epoch [50/50], Iter [240/

## Evaluation on VOC


In [20]:
'''import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"'''
import numpy as np
VOC_CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat',
        'bottle', 'bus', 'car', 'cat', 'chair',
        'cow', 'diningtable', 'dog', 'horse',
        'motorbike', 'person', 'pottedplant',
        'sheep', 'sofa', 'train', 'tvmonitor')

def voc_ap(rec,prec,use_07_metric=False):
    if use_07_metric:
        # 11 point metric
        ap = 0.
        for t in np.arange(0.,1.1,0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec>=t])
            ap = ap + p/11.

    else:
        # correct ap caculation
        mrec = np.concatenate(([0.],rec,[1.]))
        mpre = np.concatenate(([0.],prec,[0.]))

        for i in range(mpre.size -1, 0, -1):
            mpre[i-1] = np.maximum(mpre[i-1],mpre[i])

        i = np.where(mrec[1:] != mrec[:-1])[0]

        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])

    return ap

def voc_eval(preds,target,VOC_CLASSES=VOC_CLASSES,threshold=0.5,use_07_metric=False,):
    '''
    preds {'cat':[[image_id,confidence,x1,y1,x2,y2],...],'dog':[[],...]}
    target {(image_id,class):[[],]}
    '''
    aps = []
    for i,class_ in enumerate(VOC_CLASSES):
        pred = preds[class_] #[[image_id,confidence,x1,y1,x2,y2],...]
        if len(pred) == 0: #如果这个类别一个都没有检测到的异常情况
            ap = -1
            print('---class {} ap {}---'.format(class_,ap))
            aps += [ap]
            break
        #print(pred)
        image_ids = [x[0] for x in pred]
        confidence = np.array([float(x[1]) for x in pred])
        BB = np.array([x[2:] for x in pred])
        # sort by confidence
        sorted_ind = np.argsort(-confidence)
        sorted_scores = np.sort(-confidence)
        BB = BB[sorted_ind, :]
        image_ids = [image_ids[x] for x in sorted_ind]

        # go down dets and mark TPs and FPs
        npos = 0.
        for (key1,key2) in target:
            if key2 == class_:
                npos += len(target[(key1,key2)]) #统计这个类别的正样本，在这里统计才不会遗漏
        nd = len(image_ids)
        tp = np.zeros(nd)
        fp = np.zeros(nd)
        for d,image_id in enumerate(image_ids):
            bb = BB[d] #预测框
            if (image_id,class_) in target:
                BBGT = target[(image_id,class_)] #[[],]
                for bbgt in BBGT:
                    # compute overlaps
                    # intersection
                    ixmin = np.maximum(bbgt[0], bb[0])
                    iymin = np.maximum(bbgt[1], bb[1])
                    ixmax = np.minimum(bbgt[2], bb[2])
                    iymax = np.minimum(bbgt[3], bb[3])
                    iw = np.maximum(ixmax - ixmin + 1., 0.)
                    ih = np.maximum(iymax - iymin + 1., 0.)
                    inters = iw * ih

                    union = (bb[2]-bb[0]+1.)*(bb[3]-bb[1]+1.) + (bbgt[2]-bbgt[0]+1.)*(bbgt[3]-bbgt[1]+1.) - inters
                    if union == 0:
                        print(bb,bbgt)
                    
                    overlaps = inters/union
                    if overlaps > threshold:
                        tp[d] = 1
                        BBGT.remove(bbgt) #这个框已经匹配到了，不能再匹配
                        if len(BBGT) == 0:
                            del target[(image_id,class_)] #删除没有box的键值
                        break
                fp[d] = 1-tp[d]
            else:
                fp[d] = 1
        fp = np.cumsum(fp)
        tp = np.cumsum(tp)
        rec = tp/float(npos)
        prec = tp/np.maximum(tp + fp, np.finfo(np.float64).eps)
        #print(rec,prec)
        ap = voc_ap(rec, prec, use_07_metric)
        print('---class {} ap {}---'.format(class_,ap))
        aps += [ap]
    print('---map {}---'.format(np.mean(aps)))