In [1]:
import os
import json
import cv2
from PIL import Image, ImageFile
from glob import glob
import time

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms

device = torch.device('cuda')

ImageFile.LOAD_TRUNCATED_IMAGES = True

'''
{'filename': '5f656a0f627a3ef96dec882437e3e7ada1c7a877201cf54dcd7a2c4508588ff3_여_30_기쁨_공공시설&종교&의료시설_20201204105732-001-007.jpg',
 'gender': '여',
 'age': 30,
 'isProf': '전문인',
 'faceExp_uploader': '기쁨',
 'bg_uploader': '공공시설/종교/의료시설',
 'annot_A': {'boxes': {'maxX': 1912.2253,
   'maxY': 1581.6027,
   'minX': 1187.4949,
   'minY': 579.22235},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_B': {'boxes': {'maxX': 1912.348108621648,
   'maxY': 1572.1522585800617,
   'minX': 1206.363701502596,
   'minY': 579.1777983055337},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'},
 'annot_C': {'boxes': {'maxX': 1890.909447114109,
   'maxY': 1567.448627450284,
   'minX': 1183.8414475546967,
   'minY': 596.9434661684523},
  'faceExp': '기쁨',
  'bg': '공공시설/종교/의료'}}
'''

In [2]:
class BaegDataset(Dataset):
    def __init__(self , transform = None):
        self.transform = transform
        # image dataset
        # image dataset 병합
        
        
            
        
        self.data_list = glob('/data/Emotion_data/Validation/image/*')
        self.label_list = glob("/data/Emotion_data/Validation/label/*")
            
        # label map
        self.label_map = {
            '기쁨' : 0,
            '상처' : 1,
            '당황' : 2,
            '분노' : 3,
            '불안' : 4,
            '슬픔' : 5,
            '중립' : 6
        }
    
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        # load images and mask
        img_path = self.data_list[idx]
        img = Image.open(img_path).convert("RGB")
        
        if self.transform  is not None:
            img = self.transform(img)
            
        # 1. filename만 따로 빼서 for문 돌려서 json_list에 있는 것과 비교
        img_name = img_path.split('/')
        mask = {}
        for json_list in self.label_list:
            with open(json_list, 'r') as f:
                json_data = json.load(f)
                for i in range(0, len(json_data)):
                    filename = json_data[i]['filename']
                    if filename == img_name[-1]:
                        mask = json_data[i]
                        
        
        # area : box의 면적으로써 나중에 IOU구하려고 만든거.
        x_min = abs(mask['annot_A']['boxes']['minX'])
        x_max = abs(mask['annot_A']['boxes']['maxX'])
        y_min = abs(mask['annot_A']['boxes']['minY'])
        y_max = abs(mask['annot_A']['boxes']['maxY'])
        boxes = [x_min, y_min, x_max, y_max]
        boxes = torch.as_tensor(boxes, dtype = torch.float32)
        boxes = boxes.unsqueeze(0)
        
        #area = (boxes[3] - boxes[1]) * (boxes[2] - boxes[0])
        
        # label
        label = self.label_map[mask['faceExp_uploader']]
        label = torch.as_tensor(label, dtype=torch.int64)
        # return target
        target = {}
        target["boxes"] = boxes
        target["labels"] = label.unsqueeze(0)
        #target["area"] = area.to(device)
        

        return img, target

In [3]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Resize([600, 1000])])
#transforms.Resize([600, 1000])

In [5]:
dataset = BaegDataset(transform)

In [6]:
dataset[1]

(tensor([[[0.5384, 0.5472, 0.5442,  ..., 0.5492, 0.6115, 0.5982],
          [0.5390, 0.5404, 0.5511,  ..., 0.5695, 0.5989, 0.5834],
          [0.5490, 0.5391, 0.5509,  ..., 0.6096, 0.6310, 0.6075],
          ...,
          [0.4623, 0.4694, 0.5045,  ..., 0.5579, 0.5523, 0.5905],
          [0.4522, 0.4606, 0.4822,  ..., 0.4971, 0.5386, 0.5433],
          [0.4735, 0.4720, 0.4839,  ..., 0.5140, 0.5196, 0.5016]],
 
         [[0.5619, 0.5707, 0.5677,  ..., 0.4103, 0.4589, 0.4296],
          [0.5626, 0.5639, 0.5746,  ..., 0.4205, 0.4411, 0.4119],
          [0.5725, 0.5626, 0.5744,  ..., 0.4415, 0.4629, 0.4387],
          ...,
          [0.4623, 0.4694, 0.5045,  ..., 0.5500, 0.5444, 0.5826],
          [0.4522, 0.4606, 0.4822,  ..., 0.4854, 0.5268, 0.5315],
          [0.4735, 0.4720, 0.4839,  ..., 0.5022, 0.5078, 0.4898]],
 
         [[0.5619, 0.5707, 0.5677,  ..., 0.2881, 0.3164, 0.2939],
          [0.5626, 0.5639, 0.5746,  ..., 0.2790, 0.3038, 0.2918],
          [0.5725, 0.5626, 0.5744,  ...,

In [7]:
# fine tuning (frozen X)

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 7 # 우린 background 이미지는 없다.

in_features = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [8]:
len(dataset)

52139

In [9]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_datset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [10]:
print(len(train_dataset))
print(len(test_datset))

41711
10428


In [11]:
#train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=collate_fn)

In [12]:
#test_data_loader = torch.utils.data.DataLoader(test_datset, batch_size = 2, shuffle=False, num_workers=0)
test_data_loader = torch.utils.data.DataLoader(test_datset, batch_size = 2, shuffle=False, num_workers=0, collate_fn=collate_fn)

In [13]:
# optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

In [14]:
#lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

In [16]:
num_epochs = 15

In [17]:
#criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    i = 0
    epoch_loss = 0
    for imgs, labels in train_data_loader:
        try:
            #img = img.to(device)
            #print(type(label))
            #label = [{k: v.to(device) for k, v in t.items()} for t in label]
            #label = {k : v.to(device) for k, v in label}
            i += 1
            imgs = list(img.to(device) for img in imgs)
            annotations = [{k: v.to(device) for k, v in t.items()} for t in labels]

            loss_dict = model(imgs, annotations)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()

            losses.backward()

            optimizer.step()
            epoch_loss += losses
            if i == 10000:
                print(f'지금 : {i}번째 Loss : {epoch_loss}')
        except Exception as e:
            logging.error(e, exc_info=True) # log stack trace
            continue
    print('--------------------------------------------------------------')  
    print(f'epoch : {epoch+1}, Loss : {epoch_loss}, time : {time.time() - start}')
        
        #train_loss += loss.item() * inputs.size(0)
        
        #ret, predictions = torch.max(output.data, 1)
        #correct_counts = predictions.eq(label.data.view_as(predictions))
        
        #acc = torch.mean(correct_counts.type(torch.FloatTensor))
        
        #train_acc += acc.item() * inputs.size(0)
        
        #print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item
            

시작시간 5시 46분 정도.

In [None]:
     
    with torch.no_grad():
        model.eval()
        
        for j, (img, label) in enumerate(test_data_loader):
            img = label['image'].to(device)
            annotations = label['boxes'].to(device)
            label = label['label'].to(device)
            inputs = [img, annotations]
            
            outputs = model(inputs)
            loss = criterion(output, label)

In [25]:
train_dataset[0]

/data/Emotion_data/Validation/image/c09cf9277d67fdf9d12f1826cd5ba28156abac213264ad154ec993cdb281d01b_남_30_분노_공공시설&종교&의료시설_20201203233638-001-004.jpg
성공!!


{'boxes': tensor([1170.5842,  643.6230, 1661.2802, 1246.4299]),
 'label': 4,
 'area': tensor(295794.8750),
 'image': tensor([[[0.9107, 0.8853, 0.8940,  ..., 0.1031, 0.1088, 0.1264],
          [0.9058, 0.8848, 0.8846,  ..., 0.1318, 0.1287, 0.1110],
          [0.8957, 0.9042, 0.9018,  ..., 0.1110, 0.1076, 0.1153],
          ...,
          [0.8351, 0.8300, 0.8288,  ..., 0.1349, 0.1241, 0.0861],
          [0.8341, 0.8276, 0.8319,  ..., 0.1108, 0.1018, 0.0654],
          [0.8230, 0.8275, 0.8274,  ..., 0.0902, 0.0840, 0.0834]],
 
         [[0.9381, 0.9127, 0.9175,  ..., 0.1333, 0.1402, 0.1578],
          [0.9333, 0.9123, 0.9081,  ..., 0.1619, 0.1601, 0.1407],
          [0.9232, 0.9317, 0.9253,  ..., 0.1412, 0.1347, 0.1349],
          ...,
          [0.8468, 0.8418, 0.8406,  ..., 0.1353, 0.1280, 0.0900],
          [0.8459, 0.8394, 0.8436,  ..., 0.1112, 0.1058, 0.0694],
          [0.8347, 0.8393, 0.8391,  ..., 0.0906, 0.0879, 0.0873]],
 
         [[0.9107, 0.8853, 0.9018,  ..., 0.1486, 0.1520,

In [33]:
for boxes, label in enumerate(train_data_loader, 0):
    print(label.items())

/data/Emotion_data/Validation/image/46c437c5362ab25921e683613306b7d61b3216c643725852b5fd5a7838e9ffba_여_30_슬픔_상업시설&점포&시장_20210115214547-003-014.jpg
성공!!
/data/Emotion_data/Validation/image/a6eab57d3daba5dcfef4f1670247a9d398af8b3b8b3d4d53fd11339afb66b156_여_20_중립_상업시설&점포&시장_20210122180603-003-020.jpg
성공!!
dict_items([('boxes', tensor([[ 809.7222,  813.9601, 1849.5162, 2270.4502],
        [ 913.1677,  886.1880, 1758.3567, 2027.0667]])), ('label', tensor([6, 7])), ('area', tensor([1514449.7500,  964258.1250])), ('image', tensor([[[[0.6696, 0.6768, 0.8089,  ..., 0.3132, 0.3073, 0.3170],
          [0.6589, 0.6663, 0.8109,  ..., 0.3155, 0.2990, 0.3195],
          [0.6613, 0.6827, 0.8235,  ..., 0.3309, 0.3250, 0.3190],
          ...,
          [0.1289, 0.1795, 0.1191,  ..., 0.7722, 0.7685, 0.7759],
          [0.1396, 0.1662, 0.1022,  ..., 0.7695, 0.7793, 0.7618],
          [0.1133, 0.1181, 0.0702,  ..., 0.7829, 0.7705, 0.7768]],

         [[0.6388, 0.6572, 0.7770,  ..., 0.1754, 0.1679, 0.1611],

성공!!
/data/Emotion_data/Validation/image/4361bbf013d3f509826169780a2700f0c860cf56ec80189c6a69662b1bb90736_여_50_불안_공공시설&종교&의료시설_20210127225032-001-012.jpg
성공!!
dict_items([('boxes', tensor([[1287.9668,  213.6841, 2101.7173, 1335.7837],
        [1577.9138,  440.8159, 2459.2920, 1592.7188]])), ('label', tensor([1, 5])), ('area', tensor([ 913109.1250, 1015262.0000])), ('image', tensor([[[[0.9350, 0.9387, 0.9298,  ..., 0.7238, 0.7303, 0.7436],
          [0.9369, 0.9364, 0.9385,  ..., 0.7122, 0.7421, 0.7388],
          [0.9336, 0.9403, 0.9373,  ..., 0.7190, 0.7455, 0.7437],
          ...,
          [0.6036, 0.6029, 0.6152,  ..., 0.1477, 0.1367, 0.1582],
          [0.6099, 0.6017, 0.5999,  ..., 0.1759, 0.2208, 0.1816],
          [0.5887, 0.6194, 0.6121,  ..., 0.1280, 0.1907, 0.1483]],

         [[0.9193, 0.9230, 0.9141,  ..., 0.6611, 0.6676, 0.6809],
          [0.9212, 0.9207, 0.9229,  ..., 0.6495, 0.6793, 0.6760],
          [0.9179, 0.9246, 0.9216,  ..., 0.6562, 0.6827, 0.6810],
          ..

성공!!
/data/Emotion_data/Validation/image/6510399d39bee68c097512c98d7fe446a7e100309970091d5c84efcffe6a88ce_여_20_중립_도심 환경_20210208145437-008-005.jpg
성공!!
dict_items([('boxes', tensor([[1656.3026,  820.2828, 2584.6660, 2132.4058],
        [1703.0969,  570.7196, 2483.3887, 1632.6982]])), ('label', tensor([4, 7])), ('area', tensor([1218126.8750,  828653.1875])), ('image', tensor([[[[0.8286, 0.8402, 0.8291,  ..., 0.8059, 0.8414, 0.8181],
          [0.8351, 0.8258, 0.8600,  ..., 0.8323, 0.8415, 0.8467],
          [0.8423, 0.8368, 0.8380,  ..., 0.8296, 0.8388, 0.8501],
          ...,
          [0.8342, 0.8354, 0.8065,  ..., 0.8043, 0.8343, 0.8144],
          [0.8137, 0.8055, 0.8162,  ..., 0.8253, 0.8374, 0.8074],
          [0.8246, 0.8238, 0.8181,  ..., 0.7868, 0.8504, 0.8346]],

         [[0.8168, 0.8285, 0.8174,  ..., 0.7902, 0.8139, 0.8024],
          [0.8234, 0.8140, 0.8482,  ..., 0.8166, 0.8140, 0.8310],
          [0.8306, 0.8250, 0.8262,  ..., 0.8139, 0.8114, 0.8344],
          ...,
    

KeyboardInterrupt: 

In [50]:
a = {1:2, 3:4}

In [51]:
a.items()

dict_items([(1, 2), (3, 4)])

In [70]:
labels = 3

In [71]:
labels = torch.as_tensor(labels, dtype=torch.int64)

In [72]:
labels

tensor(3)