In [12]:
import torch      #pytorch
import torch.nn as nn     #pytorch network
from torch.utils.data import Dataset, DataLoader      #pytorch dataset
from torch.utils.tensorboard import SummaryWriter     #tensorboard
import torchvision      #torchvision
import torch.optim as optim     #pytorch optimizer
import numpy as np      #numpy
import matplotlib.pyplot as plt     #matplotlib(이미지 표시를 위해 필요)
from collections import OrderedDict     #python라이브러리 (라벨 dictionary를 만들 때 필요)
import os     #os
import xml.etree.ElementTree as Et      #Pascal xml을 읽어올 때 필요
from xml.etree.ElementTree import Element, ElementTree
import cv2      #opencv (box 그리기를 할 때 필요)
from PIL import Image     #PILLOW (이미지 읽기)
import time     #time
import imgaug as ia     #imgaug
from imgaug import augmenters as iaa
from torchvision import transforms      #torchvision transform
from glob import glob
import pandas as pd
#GPU연결
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.io.image import read_image
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
from torchvision.models.detection import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights
if torch.cuda.is_available():
  device = torch.device('cuda:0')

In [13]:
train_image_list=glob('../../data/dataset/validation/image/*.jpg')
validation_image_list=glob('../../data/dataset/test/image/*.jpg')
train_label=pd.read_csv('../../data/dataset/validation/bbox_label.csv',encoding='cp949')
validation_label=pd.read_csv('../../data/dataset/test/bbox_label.csv',encoding='cp949')
class MaskDataset(object):
    def __init__(self, transforms,image, label):
        '''
        path: path to train folder or test folder
        '''
        # transform module과 img path 경로를 정의
        self.transforms = transforms
        self.image = image
        self.label=label

    def __getitem__(self, idx): #special method
        # load images ad masks
        image_path=self.image[idx]
        df=self.label.loc[self.label["image"] ==os.path.basename(image_path)].reset_index()
        img = self.transforms(Image.open(image_path))
        count=len(df)

        target = {}
        boxes = []
        labels = []
        
        boxes.append([df.loc[0]['x1'],df.loc[0]['y1'],df.loc[0]['x2'],df.loc[0]['y2']])
        labels.append([1])
        boxes.append([df.loc[1]['x1'],df.loc[1]['y1'],df.loc[1]['x2'],df.loc[1]['y2']])
        labels.append([1])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)/256
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        return img, target

    def __len__(self): 
        return len(self.image)

data_transform = transforms.Compose([  # transforms.Compose : list 내의 작업을 연달아 할 수 있게 호출하는 클래스
        transforms.ToTensor() # ToTensor : numpy 이미지에서 torch 이미지로 변경
    ])

def collate_fn(batch):
    return tuple(zip(*batch))

dataset = MaskDataset(data_transform,train_image_list, train_label)
test_dataset = MaskDataset(data_transform,validation_image_list, validation_label)

data_loader = torch.utils.data.DataLoader(dataset, batch_size=4)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4)

In [14]:
def get_model_instance_segmentation(num_classes):
  
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [15]:
model = get_model_instance_segmentation(150)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 
model.to(device)



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [23]:
num_epochs = 10
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
for epoch in range(num_epochs):
    start = time.time()
    model.train()
    i = 0    
    epoch_loss = 0
    for imgs, annotations in data_loader:
        i += 1
        imgs = list(img.to(device) for img in imgs)
        annotations = annotations
        loss_dict = model(imgs, annotations) 
        losses = sum(loss for loss in loss_dict.values())        

        optimizer.zero_grad()
        losses.backward()
        optimizer.step() 
        epoch_loss += losses
    print(f'epoch : {epoch+1}, Loss : {epoch_loss}, time : {time.time() - start}')

TypeError: string indices must be integers

In [18]:
annotations

{'boxes': tensor([[[0.0000, 0.6172, 0.1562, 0.6172],
          [0.0352, 0.0000, 0.9922, 0.0352]],
 
         [[0.0391, 0.2344, 0.4375, 0.2734],
          [0.0000, 0.1602, 0.7266, 0.1602]],
 
         [[0.0078, 0.1367, 0.7656, 0.1445],
          [0.1055, 0.1484, 0.9805, 0.2539]],
 
         [[0.2852, 0.6641, 0.5469, 0.9492],
          [0.3398, 0.5000, 0.5547, 0.8398]]]),
 'labels': tensor([[[1],
          [1]],
 
         [[1],
          [1]],
 
         [[1],
          [1]],
 
         [[1],
          [1]]])}

In [None]:
annotations

{'boxes': tensor([[[  0., 158.,  40., 158.]],
 
         [[ 10.,  60., 112.,  70.]],
 
         [[  2.,  35., 196.,  37.]],
 
         [[ 73., 170., 140., 243.]]]),
 'labels': tensor([[[1]],
 
         [[1]],
 
         [[1]],
 
         [[1]]])}