In [None]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import os
import torch
import torchvision
from PIL import Image
from pycocotools.coco import COCO
import tqdm
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
class RoofDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))
    
    def __getitem__(self, index):
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        coco_annotation = coco.loadAnns(ann_ids)
        path = coco.loadImgs(img_id)[0]['file_name']
        img = Image.open(os.path.join(self.root, path))
        num_objs = len(coco_annotation)
        boxes = []

        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.ones((num_objs,), dtype=torch.int64)
        img_id = torch.tensor([img_id])
        areas = []

        for i in range(num_objs):
            areas.append(coco_annotation[i]['area'])

        areas = torch.as_tensor(areas, dtype=torch.float32)
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        masks = []
        masks = torch.zeros((num_objs, img.height, img.width), dtype=torch.uint8)

        my_annotation = {}
        my_annotation["boxes"] = boxes
        my_annotation["labels"] = labels
        my_annotation["image_id"] = img_id
        my_annotation['path'] = path
        my_annotation["area"] = areas
        my_annotation["iscrowd"] = iscrowd
        my_annotation["masks"] = masks

        if self.transforms is not None:
            img = self.transforms(img)

        img = torchvision.transforms.ToTensor()(img)

        return img, my_annotation

    def __len__(self):
        return len(self.ids)

In [None]:
dataset = RoofDataset('./dataset/train/img', './dataset/train/coco.json')
length = len(dataset)
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [length*4//5, length - length*4//5])

def collate_fn(batch):
    return tuple(zip(*batch))

train_batch_size = 16

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=0, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=train_batch_size, shuffle=True, num_workers=0, collate_fn=collate_fn)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [5]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)


num_classes = 2 
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)



In [6]:
# in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
# hidden_layer = 256
# model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)

In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [8]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001)

In [None]:
if __name__ == '__main__':
    num_epochs = 10
    best_loss = 1111111
    for epoch in range(num_epochs):
        model.train()
        i = 0    
        for imgs, annotations in tqdm.tqdm(train_loader):
            i += 1
            imgs = list(img.to(device) for img in imgs)
            for annot in annotations:
                annot['masks'] = annot['masks'].to(device)
                annot['boxes'] = annot['boxes'].to(device)
                annot['labels'] = annot['labels'].to(device)
                annot['image_id'] = torch.tensor([annot['image_id']]).to(device)
            loss_dict = model(imgs, annotations)
            losses = sum(loss for loss in loss_dict.values())
    
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
    
            if i % 10 == 0:
                print(f"Iteration: {i} Loss: {losses}")
    
        if losses < best_loss:
            best_loss = losses
            torch.save(model.state_dict(), 'best_model.pth')

100%|██████████| 6/6 [08:25<00:00, 84.30s/it]
100%|██████████| 6/6 [06:58<00:00, 69.81s/it]
100%|██████████| 6/6 [06:44<00:00, 67.36s/it]
100%|██████████| 6/6 [06:21<00:00, 63.56s/it]
100%|██████████| 6/6 [06:10<00:00, 61.75s/it]
100%|██████████| 6/6 [06:36<00:00, 66.04s/it]
100%|██████████| 6/6 [06:05<00:00, 60.95s/it]
100%|██████████| 6/6 [05:26<00:00, 54.35s/it]
100%|██████████| 6/6 [06:13<00:00, 62.22s/it]
100%|██████████| 6/6 [06:42<00:00, 67.07s/it]
