In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
!pip install matplotlib

**Завантажуємо з пристрою файли які необхідні**

In [None]:
from google.colab import files
uploaded = files.upload()

**Імпортуємо необхідні бібліотеки**

In [None]:
import augmentation, dataset, main, torch
import matplotlib.pyplot as plt

from matplotlib.patches import Rectangle

# Ignore Warning
import warnings
warnings.filterwarnings('ignore')

**Конфігурація**

In [None]:
imagenet_mean, imagenet_std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
min_size, max_size = 600, 1000

RPN_config = {'anchor_scale' : (128, 256, 512), 'anchor_aspect_ratio' : (0.5, 1.0, 2.0), 'downsample' : 16,
              'in_channels' : 512, 'num_anchors' : 9,
              'bbox_reg_weights' : (1., 1., 1., 1.),
              'iou_positive_thresh' : 0.7, 'iou_negative_high' : 0.3, 'iou_negative_low' : 0,
              'batch_size_per_image' : 256, 'positive_fraction' : 0.5,
              'min_size' : 16, 'nms_thresh' : 0.7,
              'top_n_train' : 2000, 'top_n_test' : 300}

FastRCNN_config = {'output_size' : 7, 'downsample' : 16,
                   'out_channels' : 4096, 'num_classes' : 21,
                   'bbox_reg_weights' : (10., 10., 5., 5.),
                   'iou_positive_thresh' : 0.5, 'iou_negative_high' : 0.5, 'iou_negative_low' : 0.1,
                   'batch_size_per_image' : 128, 'positive_fraction' : 0.25,
                   'min_size' : 1, 'nms_thresh' : 0.3,
                   'score_thresh' : 0.05, 'top_n' : 50}

TRAIN_config = {'epochs' : 3,
                'lr' : 0.001, 'momentum' : 0.9, 'weight_decay' : 0.0005,
                'milestones' : [10], 'clip' : 10,
                'epoch_freq' : 1, 'print_freq' : 1,
                'save' : True, 'SAVE_PATH' : './'}

TEST_config = {'num_classes' : 21, 'iou_thresh' : 0.5, 'use_07_metric' : True}

DEMO_config = {'min_size' : min_size, 'mean' : imagenet_mean, 'std' : imagenet_std, 'score_thresh' : 0.7}

gpu_id = 0

**Попередня обробка даних**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
batch_size = 1

data_dir = '/content/drive/MyDrive'

In [None]:
train_transform = augmentation.Compose([
    augmentation.Resize(min_size, max_size),
    augmentation.Flip(), augmentation.ToTensor(),
    augmentation.Normalize(mean=imagenet_mean, std=imagenet_std)
])
test_transform = augmentation.Compose([
    augmentation.Resize(min_size, max_size), augmentation.ToTensor(),
    augmentation.Normalize(mean=imagenet_mean, std=imagenet_std)
])

train_dataset = dataset.VOC_Detection(
    root=data_dir, year='2007', image_set='trainval',
    download=True, transforms=train_transform, use_diff=False
)
test_dataset = dataset.VOC_Detection(
    root=data_dir, year='2007', image_set='test',
    download=True, transforms=test_transform, use_diff=False
)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=8)

**Тренування моделі**

In [None]:
FasterRCNN = main.FasterRCNN(RPN_config, FastRCNN_config, TRAIN_config, TEST_config, DEMO_config, gpu_id)

In [None]:
FasterRCNN.train(train_loader, test_loader)

**Результат**

In [None]:
FasterRCNN = main.FasterRCNN(RPN_config, FastRCNN_config, TRAIN_config, TEST_config, DEMO_config, gpu_id)
FasterRCNN.model.load_state_dict(torch.load('./epoch_013.pt'))

In [None]:
VOC_LABELS = ('__background__',
              'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
              'horse','motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')

demo_img = './sample.png'

img, pred_labels, pred_scores, pred_detections = FasterRCNN.demo(demo_img)

In [None]:
plt.figure(figsize=(20, 20))
plt.rcParams['axes.titlesize'] = 20
plt.axis('off')

for i in range(min(6, pred_labels.shape[0])):
    plt.subplot(3, 2, i+1)
    pred_label, pred_score, pred_detection = pred_labels[i], pred_scores[i], pred_detections[i]
    image = plt.imshow(img); image.axes.get_xaxis().set_visible(False); image.axes.get_yaxis().set_visible(False)

    plt.gca().set_title(VOC_LABELS[int(pred_label)] + ' ' + str(round(100 * pred_score, 2)) + '%')
    min_x, min_y, max_x, max_y = pred_detection
    plt.gca().add_patch(Rectangle((min_x, min_y), max_x-min_x, max_y-min_y, edgecolor='r', facecolor='none'))