### Training model with 10 epoch and save the model which is named as faster_rcnn1125_10_epoch

In [2]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import pandas as pd
import torchvision
from engine import train_one_epoch, evaluate
import utils
from model_utils import get_detection_model, get_transform
from datetime import datetime


class SVHDDataset(torch.utils.data.Dataset):
    def __init__(self, df_dir, image_dir, transforms=None):
        self.df = pd.read_csv(df_dir)
        self.image_ids = self.df['filename'].unique()
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        records = self.df[self.df['filename'] == image_id]
        im_name = str(image_id) + ".png"
        img = Image.open(self.image_dir + im_name).convert("RGB")
        boxes = records[['x0', 'y0', 'x1', 'y1']].values
        boxes = torch.tensor(boxes, dtype=torch.int64)
        labels = records[['label']].values
        iscrowd = torch.zeros(labels.reshape(-1, ).shape, dtype=torch.int64)
        labels = torch.tensor(labels.reshape(-1, ), dtype=torch.int64)
        target = {}
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target["area"] = area
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([idx])
        target['iscrowd'] = iscrowd
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target

    def __len__(self):
        return self.image_ids.shape[0]


In [3]:
train_dir = "/Main/train/"
train_annotations = "/Main/train_ann.csv"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 11  # 10 class (digits) + background

# use defined dataset to get data
dataset = SVHDDataset(train_annotations, train_dir, get_transform(train=True))
dataset_test = SVHDDataset(train_annotations, train_dir,
                           get_transform(train=False))
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:int(len(indices) * 0.9)])
dataset_test = torch.utils.data.Subset(dataset_test,
                                       indices[int(len(indices) * 0.9):])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(dataset,
                                          batch_size=4,
                                          shuffle=True,
                                          collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                               batch_size=4,
                                               shuffle=False,
                                               collate_fn=utils.collate_fn)

model = get_detection_model(num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params,
                            lr=0.005,
                            momentum=0.9,
                            weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model,
                    optimizer,
                    data_loader,
                    device,
                    epoch,
                    print_freq=1000)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Epoch: [0]  [   0/7516]  eta: 1:23:04  lr: 0.000010  loss: 2.9382 (2.9382)  loss_classifier: 2.6293 (2.6293)  loss_box_reg: 0.2195 (0.2195)  loss_objectness: 0.0784 (0.0784)  loss_rpn_box_reg: 0.0110 (0.0110)  time: 0.6632  data: 0.0082  max mem: 3939
Epoch: [0]  [1000/7516]  eta: 1:33:44  lr: 0.005000  loss: 0.2922 (0.4704)  loss_classifier: 0.1272 (0.2477)  loss_box_reg: 0.1430 (0.2062)  loss_objectness: 0.0028 (0.0084)  loss_rpn_box_reg: 0.0059 (0.0080)  time: 0.9160  data: 0.0056  max mem: 5318
Epoch: [0]  [2000/7516]  eta: 1:22:21  lr: 0.005000  loss: 0.3203 (0.3867)  loss_classifier: 0.1351 (0.1876)  loss_box_reg: 0.1644 (0.1851)  loss_objectness: 0.0027 (0.0066)  loss_rpn_box_reg: 0.0059 (0.0075)  time: 0.9138  data: 0.0056  max mem: 5318
Epoch: [0]  [3000/7516]  eta: 1:08:07  lr: 0.005000  loss: 0.2826 (0.3516)  loss_classifier: 0.1081 (0.1620)  loss_box_reg: 0.1649 (0.1763)  loss_objectness: 0.0017 (0.0061)  loss_rpn_box_reg: 0.0057 (0.0072)  time: 0.9361  data: 0.0054  max me

Test:  [800/836]  eta: 0:00:12  model_time: 0.3593 (0.3443)  evaluator_time: 0.0070 (0.0074)  time: 0.3719  data: 0.0051  max mem: 5685
Test:  [835/836]  eta: 0:00:00  model_time: 0.3542 (0.3445)  evaluator_time: 0.0069 (0.0074)  time: 0.3536  data: 0.0051  max mem: 5685
Test: Total time: 0:04:59 (0.3579 s / it)
Averaged stats: model_time: 0.3542 (0.3445)  evaluator_time: 0.0069 (0.0074)
Accumulating evaluation results...
DONE (t=0.68s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.426
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.882
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.342
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.467
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.589
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | max

Epoch: [3]  [7515/7516]  eta: 0:00:00  lr: 0.000500  loss: 0.1953 (0.2238)  loss_classifier: 0.0672 (0.0752)  loss_box_reg: 0.1203 (0.1406)  loss_objectness: 0.0011 (0.0026)  loss_rpn_box_reg: 0.0035 (0.0053)  time: 0.8681  data: 0.0054  max mem: 5685
Epoch: [3] Total time: 1:52:34 (0.8987 s / it)
creating index...
index created!
Test:  [  0/836]  eta: 0:03:15  model_time: 0.2026 (0.2026)  evaluator_time: 0.0069 (0.0069)  time: 0.2336  data: 0.0078  max mem: 5685
Test:  [100/836]  eta: 0:03:12  model_time: 0.3278 (0.2476)  evaluator_time: 0.0073 (0.0080)  time: 0.3377  data: 0.0051  max mem: 5685
Test:  [200/836]  eta: 0:03:15  model_time: 0.3401 (0.2932)  evaluator_time: 0.0074 (0.0076)  time: 0.3555  data: 0.0053  max mem: 5685
Test:  [300/836]  eta: 0:02:54  model_time: 0.3583 (0.3118)  evaluator_time: 0.0069 (0.0075)  time: 0.3719  data: 0.0051  max mem: 5685
Test:  [400/836]  eta: 0:02:26  model_time: 0.3642 (0.3221)  evaluator_time: 0.0070 (0.0074)  time: 0.3693  data: 0.0056  ma

Epoch: [5]  [2000/7516]  eta: 1:21:57  lr: 0.000500  loss: 0.2022 (0.2199)  loss_classifier: 0.0571 (0.0728)  loss_box_reg: 0.1274 (0.1397)  loss_objectness: 0.0006 (0.0022)  loss_rpn_box_reg: 0.0032 (0.0051)  time: 0.8937  data: 0.0056  max mem: 5685
Epoch: [5]  [3000/7516]  eta: 1:07:05  lr: 0.000500  loss: 0.1770 (0.2214)  loss_classifier: 0.0613 (0.0739)  loss_box_reg: 0.1128 (0.1401)  loss_objectness: 0.0007 (0.0023)  loss_rpn_box_reg: 0.0040 (0.0052)  time: 0.8959  data: 0.0053  max mem: 5685
Epoch: [5]  [4000/7516]  eta: 0:52:15  lr: 0.000500  loss: 0.2044 (0.2215)  loss_classifier: 0.0722 (0.0736)  loss_box_reg: 0.1264 (0.1403)  loss_objectness: 0.0012 (0.0024)  loss_rpn_box_reg: 0.0048 (0.0052)  time: 0.9080  data: 0.0054  max mem: 5685
Epoch: [5]  [5000/7516]  eta: 0:37:24  lr: 0.000500  loss: 0.2289 (0.2218)  loss_classifier: 0.0764 (0.0737)  loss_box_reg: 0.1367 (0.1404)  loss_objectness: 0.0013 (0.0025)  loss_rpn_box_reg: 0.0048 (0.0052)  time: 0.8823  data: 0.0058  max me

DONE (t=0.58s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.477
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.928
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.426
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.468
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.516
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.621
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.526
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.569
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.569
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.565
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.587
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.629
Epoch: 

Test:  [  0/836]  eta: 0:03:15  model_time: 0.2018 (0.2018)  evaluator_time: 0.0077 (0.0077)  time: 0.2337  data: 0.0072  max mem: 5685
Test:  [100/836]  eta: 0:03:09  model_time: 0.3231 (0.2437)  evaluator_time: 0.0071 (0.0071)  time: 0.3317  data: 0.0051  max mem: 5685
Test:  [200/836]  eta: 0:03:12  model_time: 0.3343 (0.2902)  evaluator_time: 0.0074 (0.0071)  time: 0.3545  data: 0.0053  max mem: 5685
Test:  [300/836]  eta: 0:02:52  model_time: 0.3695 (0.3089)  evaluator_time: 0.0068 (0.0071)  time: 0.3682  data: 0.0051  max mem: 5685
Test:  [400/836]  eta: 0:02:24  model_time: 0.3513 (0.3189)  evaluator_time: 0.0069 (0.0071)  time: 0.3664  data: 0.0056  max mem: 5685
Test:  [500/836]  eta: 0:01:53  model_time: 0.3427 (0.3253)  evaluator_time: 0.0070 (0.0071)  time: 0.3591  data: 0.0051  max mem: 5685
Test:  [600/836]  eta: 0:01:20  model_time: 0.3554 (0.3301)  evaluator_time: 0.0068 (0.0071)  time: 0.3654  data: 0.0056  max mem: 5685
Test:  [700/836]  eta: 0:00:47  model_time: 0.35

In [4]:
torch.save(
    {
        "epoch": 10,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "lr_scheduler": lr_scheduler.state_dict()
    }, "/Main/faster_rcnn1125_10_epoch.pth")


### Training model with 5 epoch and save the model which is named as faster_rcnn1124

In [47]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import pandas as pd
import torchvision
from engine import train_one_epoch, evaluate
import utils
from model_utils import get_detection_model, get_transform
from datetime import datetime


class SVHDDataset(torch.utils.data.Dataset):
    def __init__(self, df_dir, image_dir, transforms=None):
        self.df = pd.read_csv(df_dir)
        self.image_ids = self.df['filename'].unique()
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        records = self.df[self.df['filename'] == image_id]
        im_name = str(image_id) + ".png"
        img = Image.open(self.image_dir + im_name).convert("RGB")
        boxes = records[['x0', 'y0', 'x1', 'y1']].values
        boxes = torch.tensor(boxes, dtype=torch.int64)
        labels = records[['label']].values
        iscrowd = torch.zeros(labels.reshape(-1, ).shape, dtype=torch.int64)
        labels = torch.tensor(labels.reshape(-1, ), dtype=torch.int64)
        target = {}
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target["area"] = area
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = torch.tensor([idx])
        target['iscrowd'] = iscrowd
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target

    def __len__(self):
        return self.image_ids.shape[0]


In [48]:
train_dir = "/Main/train/"
train_annotations = "/Main/train_ann.csv"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 11  # 10 class (digits) + background

# use defined dataset to get data
dataset = SVHDDataset(train_annotations, train_dir, get_transform(train=True))
dataset_test = SVHDDataset(train_annotations, train_dir,
                           get_transform(train=False))
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:int(len(indices) * 0.9)])
dataset_test = torch.utils.data.Subset(dataset_test,
                                       indices[int(len(indices) * 0.9):])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(dataset,
                                          batch_size=4,
                                          shuffle=True,
                                          collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(dataset_test,
                                               batch_size=4,
                                               shuffle=False,
                                               collate_fn=utils.collate_fn)

model = get_detection_model(num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params,
                            lr=0.005,
                            momentum=0.9,
                            weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)
num_epochs = 5

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model,
                    optimizer,
                    data_loader,
                    device,
                    epoch,
                    print_freq=1000)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)


Epoch: [0]  [   0/7516]  eta: 1:17:22  lr: 0.000010  loss: 2.9217 (2.9217)  loss_classifier: 2.6321 (2.6321)  loss_box_reg: 0.1889 (0.1889)  loss_objectness: 0.0885 (0.0885)  loss_rpn_box_reg: 0.0122 (0.0122)  time: 0.6177  data: 0.0069  max mem: 4258
Epoch: [0]  [1000/7516]  eta: 1:49:27  lr: 0.005000  loss: 0.2984 (0.4713)  loss_classifier: 0.1317 (0.2481)  loss_box_reg: 0.1527 (0.2066)  loss_objectness: 0.0025 (0.0086)  loss_rpn_box_reg: 0.0056 (0.0080)  time: 0.9886  data: 0.0056  max mem: 5640
Epoch: [0]  [2000/7516]  eta: 1:32:17  lr: 0.005000  loss: 0.3312 (0.3885)  loss_classifier: 0.1371 (0.1883)  loss_box_reg: 0.1750 (0.1860)  loss_objectness: 0.0022 (0.0067)  loss_rpn_box_reg: 0.0062 (0.0075)  time: 0.9839  data: 0.0057  max mem: 5640
Epoch: [0]  [3000/7516]  eta: 1:15:12  lr: 0.005000  loss: 0.2724 (0.3531)  loss_classifier: 0.1051 (0.1628)  loss_box_reg: 0.1539 (0.1770)  loss_objectness: 0.0017 (0.0061)  loss_rpn_box_reg: 0.0056 (0.0072)  time: 0.9949  data: 0.0054  max me

Test:  [800/836]  eta: 0:00:13  model_time: 0.3726 (0.3565)  evaluator_time: 0.0071 (0.0075)  time: 0.3836  data: 0.0050  max mem: 6008
Test:  [835/836]  eta: 0:00:00  model_time: 0.3698 (0.3569)  evaluator_time: 0.0070 (0.0075)  time: 0.3680  data: 0.0051  max mem: 6008
Test: Total time: 0:05:09 (0.3703 s / it)
Averaged stats: model_time: 0.3698 (0.3569)  evaluator_time: 0.0070 (0.0075)
Accumulating evaluation results...
DONE (t=0.70s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.414
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.871
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.320
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.402
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.464
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.601
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | max

Epoch: [3]  [7515/7516]  eta: 0:00:00  lr: 0.000500  loss: 0.1925 (0.2226)  loss_classifier: 0.0636 (0.0747)  loss_box_reg: 0.1191 (0.1399)  loss_objectness: 0.0015 (0.0026)  loss_rpn_box_reg: 0.0035 (0.0053)  time: 0.8954  data: 0.0054  max mem: 6008
Epoch: [3] Total time: 1:56:10 (0.9275 s / it)
creating index...
index created!
Test:  [  0/836]  eta: 0:03:13  model_time: 0.1996 (0.1996)  evaluator_time: 0.0077 (0.0077)  time: 0.2317  data: 0.0071  max mem: 6008
Test:  [100/836]  eta: 0:03:22  model_time: 0.3401 (0.2617)  evaluator_time: 0.0072 (0.0072)  time: 0.3498  data: 0.0052  max mem: 6008
Test:  [200/836]  eta: 0:03:22  model_time: 0.3466 (0.3047)  evaluator_time: 0.0074 (0.0072)  time: 0.3646  data: 0.0052  max mem: 6008
Test:  [300/836]  eta: 0:02:59  model_time: 0.3661 (0.3219)  evaluator_time: 0.0069 (0.0072)  time: 0.3795  data: 0.0051  max mem: 6008
Test:  [400/836]  eta: 0:02:30  model_time: 0.3647 (0.3314)  evaluator_time: 0.0070 (0.0072)  time: 0.3766  data: 0.0057  ma

In [49]:
torch.save(
    {
        "epoch": 5,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "lr_scheduler": lr_scheduler.state_dict()
    }, "/Main/faster_rcnn1124.pth")


### Predicting the testing images with faster_rcnn1124 and using threshold, then save into json file.

In [59]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import pandas as pd
import torchvision
import matplotlib.pyplot as plt
from model_utils import get_detection_model, get_transform
import json

dir_root = os.listdir("/Main/test/")
dir_root.sort(key=lambda x: int(x[:-4]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 11
file_name = 'answer1124_with_threshold_change_bbox.json'
checkpoint = torch.load('/Main/faster_rcnn1124.pth')

model = get_detection_model(num_classes)
model = model.to(device)
model.load_state_dict(checkpoint["model_state_dict"])
results = []
model.eval()
threshold = 0.5

for file in dir_root:
    image = Image.open("/Main/test/" + file)
    img = torchvision.transforms.ToTensor()(image)

    with torch.no_grad():
        prediction = model([img.to(device)])
    for pred in prediction:
        for bbox, label, score in zip(pred["boxes"], pred["labels"],
                                      pred["scores"]):
            if score > threshold:
                bbox = list(bbox.cpu().numpy().astype(float))
                # bbox (x0, y0, width, height)
                bboxes = [float(bbox[0]), float(bbox[1]),
                          (float(bbox[2])-float(bbox[0])),
                          (float(bbox[3])-float(bbox[1]))]
                scores = float(score.cpu().numpy())
                labels = int(label.cpu().numpy())
                p = dict(image_id=int(file[:-4]), score=scores,
                         category_id=labels, bbox=bboxes)
                results.append(p)

with open(file_name, 'w') as f:
    json.dump(results, f, indent=4)


### Predicting the testing images with faster_rcnn1124 without using threshold, then save into json file.

In [60]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import pandas as pd
import torchvision
import matplotlib.pyplot as plt
from model_utils import get_detection_model, get_transform
import json

dir_root = os.listdir("/Main/test/")
dir_root.sort(key=lambda x: int(x[:-4]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 11
file_name = 'answer1124_change_bbox.json'
checkpoint = torch.load('/Main/faster_rcnn1124.pth')

model = get_detection_model(num_classes)
model = model.to(device)
model.load_state_dict(checkpoint["model_state_dict"])
results = []
model.eval()
threshold = 0.5

for file in dir_root:
    image = Image.open("/Main/test/" + file)
    img = torchvision.transforms.ToTensor()(image)

    with torch.no_grad():
        prediction = model([img.to(device)])
    for pred in prediction:
        for bbox, label, score in zip(pred["boxes"], pred["labels"],
                                      pred["scores"]):
                # if score > threshold:
                bbox = list(bbox.cpu().numpy().astype(float))
                # bbox (x0, y0, width, height)
                bboxes = [float(bbox[0]), float(bbox[1]),
                          (float(bbox[2])-float(bbox[0])),
                          (float(bbox[3])-float(bbox[1]))]
                scores = float(score.cpu().numpy())
                labels = int(label.cpu().numpy())
                p = dict(image_id=int(file[:-4]), score=scores,
                         category_id=labels, bbox=bboxes)
                results.append(p)

with open(file_name, 'w') as f:
    json.dump(results, f, indent=4)


### Predicting the testing images with faster_rcnn1125_10_epoch and using threshold, then save into json file.

In [5]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import pandas as pd
import torchvision
import matplotlib.pyplot as plt
from model_utils import get_detection_model, get_transform
import json

dir_root = os.listdir("/Main/test/")
dir_root.sort(key=lambda x: int(x[:-4]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 11
file_name = 'answer1125_with_threshold_change_bbox.json'
checkpoint = torch.load('/Main/faster_rcnn1125_10_epoch.pth')

model = get_detection_model(num_classes)
model = model.to(device)
model.load_state_dict(checkpoint["model_state_dict"])
results = []
model.eval()
threshold = 0.5

for file in dir_root:
    image = Image.open("/Main/test/" + file)
    img = torchvision.transforms.ToTensor()(image)

    with torch.no_grad():
        prediction = model([img.to(device)])
    for pred in prediction:
        for bbox, label, score in zip(pred["boxes"], pred["labels"],
                                      pred["scores"]):
            if score > threshold:
                bbox = list(bbox.cpu().numpy().astype(float))
                # bbox (x0, y0, width, height)
                bboxes = [float(bbox[0]), float(bbox[1]),
                          (float(bbox[2])-float(bbox[0])),
                          (float(bbox[3])-float(bbox[1]))]
                scores = float(score.cpu().numpy())
                labels = int(label.cpu().numpy())
                p = dict(image_id=int(file[:-4]), score=scores,
                         ategory_id=labels, bbox=bboxes)
                results.append(p)

with open(file_name, 'w') as f:
    json.dump(results, f, indent=4)


### Predicting the testing images with faster_rcnn1125_10_epoch without using threshold, then save into json file.

In [6]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import pandas as pd
import torchvision
import matplotlib.pyplot as plt
from model_utils import get_detection_model, get_transform
import json

dir_root = os.listdir("/Main/test/")
dir_root.sort(key=lambda x: int(x[:-4]))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 11
file_name = 'answer1125_change_bbox.json'
checkpoint = torch.load('/Main/faster_rcnn1125_10_epoch.pth')

model = get_detection_model(num_classes)
model = model.to(device)
model.load_state_dict(checkpoint["model_state_dict"])
results = []
model.eval()
threshold = 0.5

for file in dir_root:
    image = Image.open("/Main/test/" + file)
    img = torchvision.transforms.ToTensor()(image)

    with torch.no_grad():
        prediction = model([img.to(device)])
    for pred in prediction:
        for bbox, label, score in zip(pred["boxes"], pred["labels"],
                                      pred["scores"]):
                # if score > threshold:
                bbox = list(bbox.cpu().numpy().astype(float))
                # bbox (x0, y0, width, height)
                bboxes = [float(bbox[0]), float(bbox[1]),
                          (float(bbox[2])-float(bbox[0])),
                          (float(bbox[3])-float(bbox[1]))]
                scores = float(score.cpu().numpy())
                labels = int(label.cpu().numpy())
                p = dict(image_id=int(file[:-4]), score=scores,
                         ategory_id=labels, bbox=bboxes)
                results.append(p)

with open(file_name, 'w') as f:
    json.dump(results, f, indent=4)
