In [1]:
import os
import json
import cv2
from PIL import Image
from glob import glob
import time

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms

'''
{'filename': '5f656a0f627a3ef96dec882437e3e7ada1c7a877201cf54dcd7a2c4508588ff3_여_30_기쁨_공공시설&종교&의료시설_20201204105732-001-007.jpg',
 'gender': '여',
 'age': 30,
 'isProf': '전문인',
 'faceExp_uploader': '기쁨',
 'bg_uploader': '공공시설/종교/의료시설',
 'annot_A': {'boxes': {'maxX': 1912.2253,
   'maxY': 1581.6027,
   'minX': 1187.4949,
   'minY': 579.22235},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_B': {'boxes': {'maxX': 1912.348108621648,
   'maxY': 1572.1522585800617,
   'minX': 1206.363701502596,
   'minY': 579.1777983055337},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_C': {'boxes': {'maxX': 1890.909447114109,
   'maxY': 1567.448627450284,
   'minX': 1183.8414475546967,
   'minY': 596.9434661684523},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'}}
'''

In [8]:
class BaegDataset(Dataset):
    def __init__(self , transform = None):
        self.transform = transform
        # image dataset
        # image dataset 병합
        
        
            
        
        self.data_list = glob('/data/Emotion_data/Validation/image/*')
        self.label_list = glob("/data/Emotion_data/Validation/label/*")
            
        # label map
        self.label_map = {
            '기쁨' : 1,
            '상처' : 2,
            '당황' : 3,
            '분노' : 4,
            '불안' : 5,
            '슬픔' : 6,
            '중립' : 7
        }
    
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        # load images and mask
        img_path = self.data_list[idx]
        print(self.data_list[idx])
        img = Image.open(img_path).convert("RGB")
        
        if self.transform  is not None:
            img = self.transform(img)
            
        # 1. filename만 따로 빼서 for문 돌려서 json_list에 있는 것과 비교
        img_name = img_path.split('/')
        mask = {}
        for json_list in self.label_list:
            with open(json_list, 'r') as f:
                json_data = json.load(f)
                for i in range(0, len(json_data)):
                    filename = json_data[i]['filename']
                    if filename == img_name[-1]:
                        print("성공!!")
                        mask = json_data[i]
                        
        
        # area : box의 면적으로써 나중에 IOU구하려고 만든거.
        x_min = mask['annot_A']['boxes']['minX']
        x_max = mask['annot_A']['boxes']['maxX']
        y_min = mask['annot_A']['boxes']['minY']
        y_max = mask['annot_A']['boxes']['maxY']
        boxes = [x_min, y_min, x_max, y_max]
        boxes = torch.as_tensor(boxes, dtype = torch.float32)
        
        area = (boxes[3] - boxes[1]) * (boxes[2] - boxes[0])
        
        # label
        label = self.label_map[mask['faceExp_uploader']]
        
        # return target
        target = {}
        target["boxes"] = boxes
        target["label"] = label
        target["area"] = area
        target['image'] = img
        target['iscrowd'] = False

        return target

In [9]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize([1000, 600])])

In [10]:
dataset = BaegDataset(transform)

In [11]:
dataset[0]

/data/Emotion_data/Validation/image/006b56dc2f8cda2361e1b01b2496d6f352dd5b1790f0a9b0bfcbe540b292247d_여_20_기쁨_공공시설&종교&의료시설_20210130213913-001-009.jpg
성공!!


{'boxes': tensor([1419.8829,  361.9550, 2229.0037, 1447.1696]),
 'label': 1,
 'area': tensor(878069.6250),
 'image': tensor([[[0.1725, 0.1952, 0.1986,  ..., 0.8213, 0.8070, 0.8167],
          [0.1945, 0.1888, 0.2033,  ..., 0.8312, 0.8252, 0.8014],
          [0.1966, 0.1924, 0.2021,  ..., 0.8294, 0.8258, 0.8346],
          ...,
          [0.1051, 0.1280, 0.2606,  ..., 0.7093, 0.6964, 0.7094],
          [0.0900, 0.1231, 0.2404,  ..., 0.6905, 0.6833, 0.7102],
          [0.0947, 0.1190, 0.2516,  ..., 0.6944, 0.7079, 0.7137]],
 
         [[0.1646, 0.1874, 0.1986,  ..., 0.8879, 0.8815, 0.8912],
          [0.1866, 0.1809, 0.2033,  ..., 0.8979, 0.8997, 0.8759],
          [0.1888, 0.1846, 0.2021,  ..., 0.8961, 0.9004, 0.9091],
          ...,
          [0.1051, 0.1280, 0.2645,  ..., 0.7093, 0.6964, 0.7133],
          [0.0900, 0.1231, 0.2443,  ..., 0.6944, 0.6872, 0.7141],
          [0.0947, 0.1190, 0.2516,  ..., 0.6983, 0.7118, 0.7205]],
 
         [[0.1686, 0.1913, 0.1986,  ..., 0.9193, 0.9090,

In [12]:
# fine tuning (frozen X)
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)

num_classes = 7 # 우린 background 이미지는 없다.

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [13]:
len(dataset)

52139

In [14]:
device = torch.device('cuda')

In [15]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [16]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_datset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [17]:
print(len(train_dataset))
print(len(test_datset))

41711
10428


In [18]:
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [19]:
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)

In [20]:
test_data_loader = torch.utils.data.DataLoader(test_datset, batch_size = 1, shuffle=False, num_workers=0)

In [21]:
# optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

In [22]:
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

In [23]:
num_epochs = 15

In [None]:
for epochs in range(num_epochs):
    start_time = time.time()
    model.train()
    i = 0
    epoch_loss = 0
    for img, annot in train_data_loader:

In [25]:
train_dataset[0]

/data/Emotion_data/Validation/image/c09cf9277d67fdf9d12f1826cd5ba28156abac213264ad154ec993cdb281d01b_남_30_분노_공공시설&종교&의료시설_20201203233638-001-004.jpg
성공!!


{'boxes': tensor([1170.5842,  643.6230, 1661.2802, 1246.4299]),
 'label': 4,
 'area': tensor(295794.8750),
 'image': tensor([[[0.9107, 0.8853, 0.8940,  ..., 0.1031, 0.1088, 0.1264],
          [0.9058, 0.8848, 0.8846,  ..., 0.1318, 0.1287, 0.1110],
          [0.8957, 0.9042, 0.9018,  ..., 0.1110, 0.1076, 0.1153],
          ...,
          [0.8351, 0.8300, 0.8288,  ..., 0.1349, 0.1241, 0.0861],
          [0.8341, 0.8276, 0.8319,  ..., 0.1108, 0.1018, 0.0654],
          [0.8230, 0.8275, 0.8274,  ..., 0.0902, 0.0840, 0.0834]],
 
         [[0.9381, 0.9127, 0.9175,  ..., 0.1333, 0.1402, 0.1578],
          [0.9333, 0.9123, 0.9081,  ..., 0.1619, 0.1601, 0.1407],
          [0.9232, 0.9317, 0.9253,  ..., 0.1412, 0.1347, 0.1349],
          ...,
          [0.8468, 0.8418, 0.8406,  ..., 0.1353, 0.1280, 0.0900],
          [0.8459, 0.8394, 0.8436,  ..., 0.1112, 0.1058, 0.0694],
          [0.8347, 0.8393, 0.8391,  ..., 0.0906, 0.0879, 0.0873]],
 
         [[0.9107, 0.8853, 0.9018,  ..., 0.1486, 0.1520,

In [None]:
i = 0
for img, annot in train_data_loader:
    print(i)
    i += 1
    print(img)
    print(annot)