In [1]:
import os
import json
import cv2
from PIL import Image, ImageFile
from glob import glob
import time

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms

device = torch.device('cuda')

ImageFile.LOAD_TRUNCATED_IMAGES = True

'''
{'filename': '5f656a0f627a3ef96dec882437e3e7ada1c7a877201cf54dcd7a2c4508588ff3_여_30_기쁨_공공시설&종교&의료시설_20201204105732-001-007.jpg',
 'gender': '여',
 'age': 30,
 'isProf': '전문인',
 'faceExp_uploader': '기쁨',
 'bg_uploader': '공공시설/종교/의료시설',
 'annot_A': {'boxes': {'maxX': 1912.2253,
   'maxY': 1581.6027,
   'minX': 1187.4949,
   'minY': 579.22235},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_B': {'boxes': {'maxX': 1912.348108621648,
   'maxY': 1572.1522585800617,
   'minX': 1206.363701502596,
   'minY': 579.1777983055337},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_C': {'boxes': {'maxX': 1890.909447114109,
   'maxY': 1567.448627450284,
   'minX': 1183.8414475546967,
   'minY': 596.9434661684523},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'}}
'''

In [2]:
class BaegDataset(Dataset):
    def __init__(self , transform = None):
        self.transform = transform
        # image dataset
        # image dataset 병합
        
        
            
        
        self.data_list = glob('/data/Emotion_data/Validation/image/*')
        self.label_list = glob("/data/Emotion_data/Validation/label/*")
            
        # label map
        self.label_map = {
            '기쁨' : 0,
            '상처' : 1,
            '당황' : 2,
            '분노' : 3,
            '불안' : 4,
            '슬픔' : 5,
            '중립' : 6
        }
    
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        # load images and mask
        img_path = self.data_list[idx]
        img = Image.open(img_path).convert("RGB")
        
        if self.transform  is not None:
            img = self.transform(img)
            
        # 1. filename만 따로 빼서 for문 돌려서 json_list에 있는 것과 비교
        img_name = img_path.split('/')
        mask = {}
        for json_list in self.label_list:
            with open(json_list, 'r') as f:
                json_data = json.load(f)
                for i in range(0, len(json_data)):
                    filename = json_data[i]['filename']
                    if filename == img_name[-1]:
                        mask = json_data[i]
                        
        
        # area : box의 면적으로써 나중에 IOU구하려고 만든거.
        x_min = mask['annot_A']['boxes']['minX']
        x_max = mask['annot_A']['boxes']['maxX']
        y_min = mask['annot_A']['boxes']['minY']
        y_max = mask['annot_A']['boxes']['maxY']
        boxes = [x_min, y_min, x_max, y_max]
        boxes = torch.as_tensor(boxes, dtype = torch.float32)
        boxes = boxes.unsqueeze(0)
        
        #area = (boxes[3] - boxes[1]) * (boxes[2] - boxes[0])
        
        # label
        label = self.label_map[mask['faceExp_uploader']]
        label = torch.as_tensor(label, dtype=torch.int64)
        # return target
        target = {}
        target["boxes"] = boxes
        target["labels"] = label.unsqueeze(0)
        #target["area"] = area.to(device)
        

        return img, target

In [3]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize([600, 1000])])
#transforms.Resize([600, 1000])

In [5]:
dataset = BaegDataset(transform)

In [6]:
dataset[1]

(tensor([[[0.5384, 0.5472, 0.5442,  ..., 0.5492, 0.6115, 0.5982],
          [0.5390, 0.5404, 0.5511,  ..., 0.5695, 0.5989, 0.5834],
          [0.5490, 0.5391, 0.5509,  ..., 0.6096, 0.6310, 0.6075],
          ...,
          [0.4623, 0.4694, 0.5045,  ..., 0.5579, 0.5523, 0.5905],
          [0.4522, 0.4606, 0.4822,  ..., 0.4971, 0.5386, 0.5433],
          [0.4735, 0.4720, 0.4839,  ..., 0.5140, 0.5196, 0.5016]],
 
         [[0.5619, 0.5707, 0.5677,  ..., 0.4103, 0.4589, 0.4296],
          [0.5626, 0.5639, 0.5746,  ..., 0.4205, 0.4411, 0.4119],
          [0.5725, 0.5626, 0.5744,  ..., 0.4415, 0.4629, 0.4387],
          ...,
          [0.4623, 0.4694, 0.5045,  ..., 0.5500, 0.5444, 0.5826],
          [0.4522, 0.4606, 0.4822,  ..., 0.4854, 0.5268, 0.5315],
          [0.4735, 0.4720, 0.4839,  ..., 0.5022, 0.5078, 0.4898]],
 
         [[0.5619, 0.5707, 0.5677,  ..., 0.2881, 0.3164, 0.2939],
          [0.5626, 0.5639, 0.5746,  ..., 0.2790, 0.3038, 0.2918],
          [0.5725, 0.5626, 0.5744,  ...,

In [7]:
# fine tuning (frozen X)

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 8 

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [8]:
len(dataset)

52139

In [9]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_datset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [10]:
print(len(train_dataset))
print(len(test_datset))

41711
10428


In [11]:
#train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=collate_fn)

In [12]:
#test_data_loader = torch.utils.data.DataLoader(test_datset, batch_size = 2, shuffle=False, num_workers=0)
test_data_loader = torch.utils.data.DataLoader(test_datset, batch_size = 2, shuffle=False, num_workers=0, collate_fn=collate_fn)

In [13]:
# optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.0001,
                                momentum=0.9, weight_decay=0.0005)

In [14]:
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

In [15]:
num_epochs = 15

In [16]:
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    i = 0
    epoch_loss = 0
    for imgs, labels in train_data_loader:
        try:
            #img = img.to(device)
            #print(type(label))
            #label = [{k: v.to(device) for k, v in t.items()} for t in label]
            #label = {k : v.to(device) for k, v in label}
            i += 1
            imgs = list(img.to(device) for img in imgs)
            annotations = [{k: v.to(device) for k, v in t.items()} for t in labels]
            loss_dict = model(imgs, annotations)
            print('--------------------------------------------------')
            print(loss_dict)
            losses = sum(loss for loss in loss_dict.values())
            optimizer.zero_grad()

            losses.backward()

            optimizer.step()
            epoch_loss += losses
            if i == 100:
                print(f'지금 : {i}번째 Loss : {epoch_loss}')
        except Exception as e:
            print("오류발생")
            continue
    print('--------------------------------------------------------------')  
    print(f'epoch : {epoch+1}, Loss : {epoch_loss}, time : {time.time() - start}')

--------------------------------------------------
{'loss_classifier': tensor(2.2179, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(1.4048e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(5.0061, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(27.7051, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(2.1959, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(6.5946e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(2.3417, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(12.6516, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(2.1253, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0006, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tenso

--------------------------------------------------
{'loss_classifier': tensor(0.3763, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(5.1601e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(1.7222, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(53.9558, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.3353, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0001, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.3865, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(0.0227, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.3385, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.7

--------------------------------------------------
{'loss_classifier': tensor(0.0382, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(1.9159e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(1.0402, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(38.4479, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0411, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.3897, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(0.0432, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0272, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(5.1954e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor

--------------------------------------------------
{'loss_classifier': tensor(0.0164, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(6.7516e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(1.0010, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(25.1631, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0153, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(7.1354e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(1.3682, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(56.2038, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0154, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(4.5555e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': t

--------------------------------------------------
{'loss_classifier': tensor(0.0105, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(2.7309e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(1.2198, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(59.4655, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0147, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(4.6051e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(1.8246, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(90.2373, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0120, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(2.6851e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': t

--------------------------------------------------
{'loss_classifier': tensor(0.0150, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0079, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.9956, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(24.4379, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0107, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(2.0584e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.5582, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(10.9847, device='cuda:0', grad_fn=<DivBackward0>)}
--------------------------------------------------
{'loss_classifier': tensor(0.0104, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(9.1153e-05, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tenso

KeyboardInterrupt: 

In [17]:
ex_image = '/data/Emotion_data/Validation/image/fff3bc45e564d557af38b876dc5bdbe6185806c2573da2335335f6bedc7a46b3_여_20_상처_행사&사무공간_20210225154306-006-001.jpg'

In [19]:
img = Image.open(ex_image).convert("RGB")

In [24]:
def make_prediction(model, img, threshold):
    model.eval()
    preds = model(img)
    print(preds)
    for id in range(len(preds)) :
        idx_list = []

        for idx, score in enumerate(preds[id]['scores']) :
            if score > threshold : 
                idx_list.append(idx)

        preds[id]['boxes'] = preds[id]['boxes'][idx_list]
        preds[id]['labels'] = preds[id]['labels'][idx_list]
        preds[id]['scores'] = preds[id]['scores'][idx_list]

    return preds

In [26]:
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [28]:
for imgs, annotations in test_data_loader:
        imgs = list(img.to(device) for img in imgs)

        pred = make_prediction(model, imgs, 0.5)
        print(pred)
        break

[{'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<StackBackward>), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward>)}, {'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<StackBackward>), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward>)}]
[{'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<IndexBackward>), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward>)}, {'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<IndexBackward>), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward>)}]
