In [1]:
import os
import json
import cv2
from PIL import Image
from glob import glob

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms

'''
{'filename': '5f656a0f627a3ef96dec882437e3e7ada1c7a877201cf54dcd7a2c4508588ff3_여_30_기쁨_공공시설&종교&의료시설_20201204105732-001-007.jpg',
 'gender': '여',
 'age': 30,
 'isProf': '전문인',
 'faceExp_uploader': '기쁨',
 'bg_uploader': '공공시설/종교/의료시설',
 'annot_A': {'boxes': {'maxX': 1912.2253,
   'maxY': 1581.6027,
   'minX': 1187.4949,
   'minY': 579.22235},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_B': {'boxes': {'maxX': 1912.348108621648,
   'maxY': 1572.1522585800617,
   'minX': 1206.363701502596,
   'minY': 579.1777983055337},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_C': {'boxes': {'maxX': 1890.909447114109,
   'maxY': 1567.448627450284,
   'minX': 1183.8414475546967,
   'minY': 596.9434661684523},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'}}
'''

In [2]:
class BaegDataset(Dataset):
    def __init__(self ,mode = 'train', transform = None):
        self.mode = mode
        self.transform = transform
        # image dataset
        # image dataset 병합
        if self.mode == 'train':
            upset_list = glob("/data/Emotion_data/Training/upset/*")
            pleasure_list = glob("/data/Emotion_data/Training/pleasure/*")
            hurt_list = glob("/data/Emotion_data/Training/hurt/*")
            anger_list = glob("/data/Emotion_data/Training/anger/*")
            unrest_list = glob("/data/Emotion_data/Training/unrest/*")
            sad_list = glob("/data/Emotion_data/Training/sad/*")
            neutrality_list = glob("/data/Emotion_data/Training/neutrality/*")
            self.data_list = upset_list + pleasure_list + anger_list + unrest_list + sad_list + neutrality_list
        
            # json dataset
            self.label_list = glob("/data/Emotion_data/Training/label/*")
        elif self.mode == 'test':
            self.data_list = glob('/data/Emotion_data/Validation/image/*')
            self.label_list = glob("/data/Emotion_data/Validation/label/*")
            
        # label map
        self.label_map = {
            '기쁨' : 1,
            '상처' : 2,
            '당황' : 3,
            '분노' : 4,
            '불안' : 5,
            '슬픔' : 6,
            '중립' : 7
        }
    
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        # load images and mask
        img_path = self.data_list[idx]
        print(self.data_list[idx])
        img = Image.open(img_path).convert("RGB")
        
        if self.transform  is not None:
            img = self.transform(img)
            
        # 1. filename만 따로 빼서 for문 돌려서 json_list에 있는 것과 비교
        img_name = img_path.split('/')
        mask = {}
        for json_list in self.label_list:
            with open(json_list, 'r') as f:
                json_data = json.load(f)
                for i in range(0, len(json_data)):
                    filename = json_data[i]['filename']
                    if filename == img_name[-1]:
                        print("성공!!")
                        mask = json_data[i]
                        
        
        # area : box의 면적으로써 나중에 IOU구하려고 만든거.
        x_min = mask['annot_A']['boxes']['minX']
        x_max = mask['annot_A']['boxes']['maxX']
        y_min = mask['annot_A']['boxes']['minY']
        y_max = mask['annot_A']['boxes']['maxY']
        boxes = [x_min, y_min, x_max, y_max]
        boxes = torch.as_tensor(boxes, dtype = torch.float32)
        
        area = (boxes[3] - boxes[1]) * (boxes[2] - boxes[0])
        
        # label
        label = self.label_map[mask['faceExp_uploader']]
        
        # return target
        target = {}
        target["boxes"] = boxes
        target["label"] = label
        target["area"] = area
        target['image'] = img
        target['iscrowd'] = False

        return target

In [3]:
transform = transforms.Compose([transforms.ToTensor()])

In [4]:
dataset = BaegDataset("test", transform)

In [5]:
dataset[0]

/data/Emotion_data/Validation/image/006b56dc2f8cda2361e1b01b2496d6f352dd5b1790f0a9b0bfcbe540b292247d_여_20_기쁨_공공시설&종교&의료시설_20210130213913-001-009.jpg
성공!!


{'boxes': tensor([1419.8829,  361.9550, 2229.0037, 1447.1696]),
 'label': 1,
 'area': tensor(878069.6250),
 'image': tensor([[[0.1412, 0.1804, 0.1843,  ..., 0.8118, 0.7922, 0.8157],
          [0.1451, 0.1608, 0.1686,  ..., 0.8118, 0.8118, 0.8157],
          [0.1961, 0.1725, 0.1686,  ..., 0.8078, 0.8275, 0.8118],
          ...,
          [0.0902, 0.0902, 0.0902,  ..., 0.7137, 0.7137, 0.7137],
          [0.0980, 0.0980, 0.0980,  ..., 0.7137, 0.7098, 0.7098],
          [0.1020, 0.1020, 0.0980,  ..., 0.7137, 0.7137, 0.7137]],
 
         [[0.1333, 0.1725, 0.1765,  ..., 0.8863, 0.8667, 0.8902],
          [0.1373, 0.1529, 0.1608,  ..., 0.8863, 0.8863, 0.8902],
          [0.1882, 0.1647, 0.1608,  ..., 0.8824, 0.9020, 0.8863],
          ...,
          [0.0902, 0.0902, 0.0902,  ..., 0.7176, 0.7176, 0.7176],
          [0.0980, 0.0980, 0.0980,  ..., 0.7176, 0.7216, 0.7216],
          [0.1020, 0.1020, 0.0980,  ..., 0.7255, 0.7255, 0.7255]],
 
         [[0.1373, 0.1765, 0.1804,  ..., 0.9098, 0.8902,

In [7]:
# fine tuning (frozen X)
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)

num_classes = 7 # 우린 background 이미지는 없다.

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)