# Isprobavanje Mask R-CNN-a


In [1]:
%%shell

pip install cython
# Install pycocotools, the version by default in Colab
# has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-gebf8ryo
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-gebf8ryo
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=264363 sha256=fe0d98770cd1c16c7fd1156dbdfce45a1f0c381eb9c91afd0d13cc5cf6c1e483
  Stored in directory: /tmp/pip-ephem-wheel-cache-ulgk5nb1/wheels/e2/6b/1d/344ac773c7495ea0b85eb228bc66daec7400a143a92d36b7b1
Successfully built pycocotools
Installing collected packages: pycocotools
  Attempting uninstall: pycocotools
    Found existing installation: pycocotools 2.0.4
    Uninstalling pycocotools-2.0.4:
      Successfully uninstalled pycocotools-2.0.4
Successfully installed pycocotools-2.0




In [2]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.8.2

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

Cloning into 'vision'...
remote: Enumerating objects: 110061, done.[K
remote: Counting objects: 100% (2746/2746), done.[K
remote: Compressing objects: 100% (670/670), done.[K
remote: Total 110061 (delta 2483), reused 2216 (delta 2062), pack-reused 107315[K
Receiving objects: 100% (110061/110061), 213.26 MiB | 31.94 MiB/s, done.
Resolving deltas: 100% (95143/95143), done.
Note: checking out 'v0.8.2'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at 2f40a483d [v0.8.X] .circleci: Add Python 3.9 to CI (#3063)




In [25]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.io import read_image
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
from engine import train_one_epoch, evaluate
import utils
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import time
import os
import copy
import transforms as T

from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

import time
from datetime import datetime

plt.ion()   # interactive mode

In [4]:
# Access to uploaded files
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
dir_prefix = 'drive/My Drive/Colab Notebooks/Diplomski'

In [6]:
attrs_of_interest = ['bridge', 'endofilling', 'filling', 'crown']

def default_labeler(row):
  """ returns a list of labels which can range from 1 to n, refering to the according string label in array attrs_of_interest """
  label = []
  for i in range(len(attrs_of_interest)):
    if row[attrs_of_interest[i]] == 'yes': label.append(i)
  return label

class XRayDataset(torch.utils.data.Dataset):
    def __init__(self, root, csv_file, subset=None, transforms=None, labeler=default_labeler):
        self.root = root
        self.transforms = transforms
        self.csv_file = csv_file
        self.subset = subset
        self.labeler = labeler
        self.df = pd.read_csv(csv_file, sep = ";")
        if self.subset: self.df = self.subset(self.df)
        self.object_data_by_img = dict()
        
        for index, row in self.df.iterrows():       # Returns integer for a label if [] is returned the row is skipped 
          label = self.labeler(row)
          for lbl in label:
            if not row['img_name'] in self.object_data_by_img: 
              self.object_data_by_img[row['img_name']] = list()
            
            self.object_data_by_img[row['img_name']].append((lbl, row['x1'], row['y1'], row['x2'], row['y2']))

        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(self.object_data_by_img))

    def __getitem__(self, idx):
        # load images and masks
        img_path = os.path.join(self.root, self.imgs[idx])
        img = Image.open(img_path).convert("RGB")

        # get bounding box coordinates for each object
        objs = self.object_data_by_img[self.imgs[idx]]
        num_objs = len(objs)
        labels = [x[0] for x in objs]
        boxes = [[x[1], x[2], x[3], x[4]] for x in objs] # Get x1, y1, x2, y2, bounding box coordinates of the tooth

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # convert everything into a torch.Tensor
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [7]:
# Func for more transforms if the need arises
def get_transform():
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

In [8]:
def keep_only_subset(df, num_of_total_negatives=-1, num_of_positives=-1):
  df_some_yes = df[~((df['crown'] == 'no') & (df['endofilling'] == 'no') & (df['filling'] == 'no') & (df['bridge'] == 'no'))]
  df_all_no = df[(df['crown'] == 'no') & (df['endofilling'] == 'no') & (df['filling'] == 'no') & (df['bridge'] == 'no')]
  if num_of_total_negatives >= 0:
    df_all_no = df_all_no.sample(n=num_of_total_negatives, random_state=1)
  if num_of_positives >= 0:
    df_some_yes = df_some_yes.sample(n=num_of_positives, random_state=1)
  print(f'len(df_some_yes): {len(df_some_yes)}, len(df_all_no): {len(df_all_no)}')
  return pd.concat([df_some_yes,df_all_no]).copy()

In [9]:
img_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
image_datasets = dict()
dataloaders = dict()
dataset_sizes = dict()
class_names = dict()

csv_filename = os.path.join(dir_prefix, 'multi_label_dataset', 'tooth_info.csv')
img_dir = os.path.join(dir_prefix, 'data_store')
dataset = XRayDataset(root=img_dir,
                      csv_file=csv_filename,
                      subset=lambda df: keep_only_subset(df, num_of_total_negatives=0))


first = dataset[0]
print(first[1])
# total_number = len(dataset)
# split_size = [int(total_number * 0.7), total_number - int(total_number * 0.7)]
# dataset_train, dataset_val = random_split(dataset, split_size, generator=torch.Generator().manual_seed(42))

# image_datasets = {'train': dataset_train, 'val': dataset_val}
# dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
#                                              shuffle=True, num_workers=4)
#               for x in ['train', 'val']}
# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
# class_names = attrs_of_interest

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

len(df_some_yes): 8790, len(df_all_no): 0
{'boxes': tensor([[ 789.,  421.,  942.,  767.],
        [ 902.,  407., 1099.,  774.],
        [1486.,  404., 1609.,  811.],
        [2069.,  444., 2262.,  771.],
        [1539.,  801., 1629., 1137.],
        [1469.,  804., 1576., 1147.]]), 'labels': tensor([2, 2, 2, 2, 0, 3]), 'image_id': tensor([0]), 'area': tensor([52938., 72299., 50061., 63111., 30240., 36701.]), 'iscrowd': tensor([0, 0, 0, 0, 0, 0])}


In [10]:
print(dataset.imgs[0])
print(len(dataset))

abaft-real-heady-goofy-gruesome-iron_22ff963593dc2d3e0310315f5bdf7cd8.jpg
932


In [None]:
# load a model pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 4  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [11]:
def get_train_test_datasets(len_train):
  torch.manual_seed(1)
  dataset = XRayDataset(root=img_dir,
                      csv_file=csv_filename,
                      subset=lambda df: keep_only_subset(df, num_of_total_negatives=0),
                      transforms=get_transform())
  dataset_test = XRayDataset(root=img_dir,
                      csv_file=csv_filename,
                      subset=lambda df: keep_only_subset(df, num_of_total_negatives=0),
                      transforms=get_transform())

  # split the dataset in train and test set
  indices = torch.randperm(len(dataset)).tolist()
  dataset = torch.utils.data.Subset(dataset, indices[:-len_train])
  dataset_test = torch.utils.data.Subset(dataset_test, indices[-len_train:])
  return dataset, dataset_test


In [None]:
def main(model):
    # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 4
    # use our dataset and defined transformations
    dataset, dataset_test = get_train_test_datasets(len_train=186)

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=2, shuffle=True, num_workers=4,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=4,
        collate_fn=utils.collate_fn)

    # get the model using our helper function
    # model = get_model_instance_segmentation(num_classes)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    # let's train it for 10 epochs
    num_epochs = 2

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)

    print("That's it!")

In [None]:
main(model)

len(df_some_yes): 8790, len(df_all_no): 0
len(df_some_yes): 8790, len(df_all_no): 0


  cpuset_checked))
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [  0/373]  eta: 0:20:07  lr: 0.000018  loss: 2.1923 (2.1923)  loss_classifier: 1.2247 (1.2247)  loss_box_reg: 0.4800 (0.4800)  loss_objectness: 0.4593 (0.4593)  loss_rpn_box_reg: 0.0281 (0.0281)  time: 3.2385  data: 1.6248  max mem: 2288
Epoch: [0]  [ 10/373]  eta: 0:09:26  lr: 0.000153  loss: 2.0634 (2.2947)  loss_classifier: 1.1340 (1.1048)  loss_box_reg: 0.3917 (0.3834)  loss_objectness: 0.4531 (0.7515)  loss_rpn_box_reg: 0.0570 (0.0550)  time: 1.5603  data: 0.1742  max mem: 2470
Epoch: [0]  [ 20/373]  eta: 0:08:40  lr: 0.000287  loss: 1.5014 (1.8251)  loss_classifier: 0.7661 (0.8697)  loss_box_reg: 0.4668 (0.4424)  loss_objectness: 0.1561 (0.4671)  loss_rpn_box_reg: 0.0456 (0.0458)  time: 1.3862  data: 0.0283  max mem: 2470
Epoch: [0]  [ 30/373]  eta: 0:08:21  lr: 0.000421  loss: 1.3447 (1.6918)  loss_classifier: 0.5881 (0.7864)  loss_box_reg: 0.5565 (0.5114)  loss_objectness: 0.1256 (0.3509)  loss_rpn_box_reg: 0.0344 (0.0431)  time: 1.4087  data: 0.0319  max mem: 2470


In [None]:
# datetime object containing current date and time
now = datetime.now()
 
# dd/mm/YY H:M:S
dt_string = now.strftime("%d:%m:%Y-%H:%M:%S")

torch.save(model, os.path.join(dir_prefix, 'trained_models', f'faster-rcnn_model-{dt_string}-epochs-2'))

In [12]:
# Load the model 

model_ld = torch.load(os.path.join(dir_prefix, 'trained_models', 'faster-rcnn_model-22:03:2022-20:23:30-epochs-2'))
model_ld.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [13]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

_, dataset_test = get_train_test_datasets(len_train=186)
# pick one image from the test set
img, _ = dataset_test[0]
# put the model in evaluation mode
model_ld.eval()
with torch.no_grad():
    prediction = model_ld([img.to(device)])

len(df_some_yes): 8790, len(df_all_no): 0
len(df_some_yes): 8790, len(df_all_no): 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [14]:
prediction

[{'boxes': tensor([[2099.2134,  520.3198, 2264.6233,  820.1273],
          [ 984.1761,  503.8928, 1141.6080,  813.8677],
          [1011.7909,  806.8479, 1222.1090, 1136.9667],
          [2248.0393,  484.1809, 2411.9785,  808.9571],
          [2190.9768,  786.5609, 2398.6370, 1116.7639],
          [ 841.1324,  475.6088,  998.7358,  809.8297],
          [2029.6498,  802.4694, 2203.7354, 1133.8635],
          [ 859.9561,  797.8242, 1054.0912, 1123.4205],
          [2019.9210,  487.7935, 2120.2544,  825.5541],
          [1128.1813,  487.4929, 1232.4130,  827.8107],
          [1083.3447,  823.5870, 1267.8374, 1158.1135],
          [1137.3422,  498.2512, 1235.1154,  825.1070],
          [2399.5425,  470.4774, 2532.5181,  773.0168],
          [1214.4362,  480.2794, 1325.6039,  825.3911],
          [1171.7881,  819.8394, 1290.2227, 1166.7391],
          [ 704.8169,  473.9034,  851.9825,  778.1369],
          [2147.8040,  489.6821, 2350.7756,  826.4849],
          [2361.5330,  756.0206, 2545.5

In [28]:
img_pred = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())

img1 = ImageDraw.Draw(img_pred)  
img1.rectangle([2099.2134,  520.3198, 2264.6233,  820.1273], outline ="blue", width=4)
font = ImageFont.truetype("helvetica.ttf", size=15)
img1.text((2099.2134,  520.3198), '2', font=font, align ="left")
img_pred