In [14]:
from torch_snippets import *
from PIL import Image
import pandas as pd
import numpy as np
import glob

In [15]:
IMAGE_ROOT = './dataset/images/images'
df_raw = df = pd.read_csv('./dataset/df.csv')

In [16]:
label2target = {l : t + 1 for t,l in enumerate(df_raw['LabelName'].unique())}
label2target['background'] = 0
target2label = {t : l for l,t in label2target.items()}
background_class = label2target['background']
num_classes = len(label2target)
print('target2label : {}'.format(target2label))
print('label2target : {}'.format(label2target))

target2label : {1: 'Bus', 2: 'Truck', 0: 'background'}
label2target : {'Bus': 1, 'Truck': 2, 'background': 0}


In [17]:
def preprocessing_image(img):
    img = torch.tensor(img).permute(2,0,1)
    return img.to(device).float()

In [18]:
class OpenDataset(torch.utils.data.Dataset):
    w,h = 224, 224
    def __init__(self,df,image_dir = IMAGE_ROOT):
        self.image_dir = image_dir
        self.df = df
        self.files = glob.glob(self.image_dir + '/*')
        self.image_infos = df.ImageID.unique()
    
    def __getitem__(self,ix):
        image_id = self.image_infos[ix]
        img_path = find(image_id,self.files)
        img = Image.open(img_path).convert('RGB')
        img = np.array(img.resize((self.w,self.h), resample=Image.BILINEAR) / 255.)
        data = df[df['ImageID'] == image_id]
        labels = data['LabelName'].values.tolist()
        data = data[['XMin,YMin,XMax,YMax'.split(',')]].values

        # 절대 좌표로 변환
        data[:,[0,2]] *= self.w
        data[:,[1,3]] *= self.h
        boxes = data.astype(np.uint32).tolist()

        target = {}
        target['boxes'] = torch.Tensor(boxes).float()
        target['labels'] = torch.Tensor([label2target[i] for i in labels]).long()

        img = preprocessing_image(img)
        return img, target

    def collate_fn(self,batch):
        return tuple(zip(*batch))

    def __len__(self):
        return len(self.image_infos)

In [20]:
from sklearn.model_selection import train_test_split
trn_ids, val_ids = train_test_split(df['ImageID'].unique(),test_size=0.1, random_state=99)
trn_df, val_df = df[df['ImageID'].isin(trn_ids)], df[df['ImageID'].isin(val_ids)]

train_ds = OpenDataset(trn_df)
test_ds = OpenDataset(val_df)

train_loader = DataLoader(train_ds, batch_size = 4, collate_fn=train_ds.collate_fn, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=4, collate_fn=test_ds.collate_fn, drop_last=True)

In [25]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_model():
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
    return model

In [26]:
model = get_model()
model

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /Users/chojaeyoung/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth
100%|██████████| 97.8M/97.8M [00:04<00:00, 23.9MB/s]


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample)

In [27]:
def train_batch(inputs, model, optimizer):
    model.train()
    input, targets = inputs
    input = list(image.to(device) for image in inputs)
    targets = [{k : v.to(device) for k,v in t.items()} for t in targets]
    
    optimizer.zero_grad()
    losses = model(input, targets)
    loss = sum(loss for loss in losses.values())
    loss.backward()
    optimizer.step()

    return loss , losses

In [29]:
@torch.no_grad()
def validate_batch(inputs, model):
    # losses 값을 얻기 위해서는 반드시 model.train mode 에서 가능하다.
    model.train()
    input, targets = inputs
    input = list(image.to(device) for image in inputs)
    targets = [{k : v.to(device) for k,v in t.items()} for t in targets]

    optimizer.zero_grad()
    losses = model(input, targets)
    loss = sum(loss for loss in losses.values())
    return loss, losses

In [30]:
model = get_model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.005)
n_epochs = 5
log = Report(n_epochs)