In [2]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T
import os
import numpy as np
import pandas as pd

In [50]:
from torchvision import datasets, models
from torchvision.transforms import functional as FT
from torch.utils.data import DataLoader, sampler, random_split, Dataset
import copy
import cv2
from pycocotools.coco import COCO
class CustomDataset(datasets.VisionDataset):
    def __init__(self, root, transforms=None):
        # the 3 transform parameters are reuqired for datasets.VisionDataset
        super().__init__(root, transforms)
        self.coco = COCO(os.path.join(root, "_annotations.coco.json")) # annotatiosn stored here
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.ids = [id for id in self.ids if (len(self._load_target(id)) > 0)]
    
    def _load_image(self, id: int):
        path = self.coco.loadImgs(id)[0]['file_name']
        image = cv2.imread(os.path.join(self.root, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return image
    def _load_target(self, id):
        return self.coco.loadAnns(self.coco.getAnnIds(id))
    
    def __getitem__(self, index):
        id = self.ids[index]
        image = self._load_image(id)
        target = self._load_target(id)
        target = copy.deepcopy(self._load_target(id))
        
        boxes = [t['bbox'] + [t['category_id']] for t in target] # required annotation format for albumentations
        if self.transforms is not None:
            transformed = self.transforms(image=image, bboxes=boxes)
        
        image = transformed['image']
        boxes = transformed['bboxes']
        
        new_boxes = [] # convert from xywh to xyxy
        for box in boxes:
            xmin = box[0]
            xmax = xmin + box[2]
            ymin = box[1]
            ymax = ymin + box[3]
            new_boxes.append([xmin, ymin, xmax, ymax])
        
        boxes = torch.tensor(new_boxes, dtype=torch.float32)
        
        targ = {} # here is our transformed target
        targ['boxes'] = boxes
        targ['labels'] = torch.tensor([t['category_id'] for t in target], dtype=torch.int64)
        targ['image_id'] = torch.tensor([t['image_id'] for t in target])
        targ['area'] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) # we have a different area
        targ['iscrowd'] = torch.tensor([t['iscrowd'] for t in target], dtype=torch.int64)
        return image.div(255), targ # scale images
    def __len__(self):
        return len(self.ids)

In [51]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
def get_transforms(train=False):
    if train:
        transform = A.Compose([
            A.Resize(600, 600), # our input size can be 600px
            A.HorizontalFlip(p=0.3),
            A.VerticalFlip(p=0.3),
            A.RandomBrightnessContrast(p=0.1),
            A.ColorJitter(p=0.1),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    else:
        transform = A.Compose([
            A.Resize(600, 600), # our input size can be 600px
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='coco'))
    return transform

In [5]:
#A quoi ca sert ??
def collate_fn(batch):
    return tuple(zip(*batch))

In [52]:
train_dir = '../Box-Counting-5/train'
test_dir = '../Box-Counting-5/test'
valid_dir = '../Box-Counting-5/valid'

# Define datasets and dataloaders for train, test, and validation
train_dataset = CustomDataset(root=train_dir, transforms=get_transforms(True))
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=4, collate_fn=collate_fn)

test_dataset = CustomDataset(root=test_dir, transforms=get_transforms())
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=4)

valid_dataset = CustomDataset(root=valid_dir, transforms=get_transforms())
valid_dataloader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=4,)


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [44]:
from functools import partial
from torchvision.models.detection import _utils as det_utils
from torchvision.models.detection.ssdlite import SSDLiteClassificationHead
    
model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(pretrained=True)

in_channels = det_utils.retrieve_out_channels(model.backbone, (320, 320))
num_anchors = model.anchor_generator.num_anchors_per_location()
norm_layer  = partial(torch.nn.BatchNorm2d, eps=0.001, momentum=0.03)
num_classes = 4 + 1
model.head.classification_head = SSDLiteClassificationHead(in_channels, num_anchors, num_classes, norm_layer)


The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=SSDLite320_MobileNet_V3_Large_Weights.COCO_V1`. You can also use `weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT` to get the most up-to-date weights.


## Test avec une seul image

In [53]:
images,targets = next(iter(train_dataloader))
images = list(image for image in images)
targets = [{k:v for k, v in t.items()} for t in targets]
output = model(images, targets) # just make sure this runs without error


In [21]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.002, momentum=0.9, nesterov=True, weight_decay=2e-4)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=80, gamma=0.1)

In [None]:
## Inutile car les librairies dessous font la même chose en mieux
from tqdm import tqdm # progress bar

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device is {}".format(device))

num_epochs = 3

# Train the model
for epoch in range(num_epochs):
    
    model = model.to(device)
    model.train()

    all_losses = []
    all_losses_dict = []
    
    for images, target in tqdm(train_dataloader):
        optimizer.zero_grad()
        
        images,targets = next(iter(train_dataloader))
        images = list(image for image in images)
        targets = [{k:v for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_dict_append = {k: v.item() for k, v in loss_dict.items()}
        loss_value = losses.item()
        
        all_losses.append(loss_value)
        all_losses_dict.append(loss_dict_append)

        losses.backward()
        optimizer.step()

    all_losses_dict = pd.DataFrame(all_losses_dict) # for printing
    print("Epoch {}, lr: {:.6f}, loss: {:.6f}, loss_classifier: {:.6f}, loss_box: {:.6f}, loss_rpn_box: {:.6f}, loss_object: {:.6f}".format(
            epoch, optimizer.param_groups[0]['lr'],
            np.mean(all_losses),
            all_losses_dict['loss_classifier'].mean(),
            all_losses_dict['loss_box_reg'].mean(),
            all_losses_dict['loss_rpn_box_reg'].mean(),
            all_losses_dict['loss_objectness'].mean()
        ))
        

# Save the trained model
torch.save(model.state_dict(), './Box-Counting-4/fine_tuned_ssd_model.pth')

device is cpu


 12%|█▏        | 6/51 [00:05<00:39,  1.15it/s]


KeyboardInterrupt: 

Le git permet d'obtenir des fonctions utiles pour l'entraînement et l'évaluation

In [None]:
!git clone https://github.com/pytorch/vision.git
!git checkout v0.3.0

!cp vision/references/detection/utils.py ./
!cp vision/references/detection/transforms.py ./
!cp vision/references/detection/coco_eval.py ./
!cp vision/references/detection/engine.py ./
!cp vision/references/detection/coco_utils.py ./

Cloning into 'vision'...
remote: Enumerating objects: 493869, done.[K
remote: Counting objects: 100% (7479/7479), done.[K
remote: Compressing objects: 100% (459/459), done.[K
remote: Total 493869 (delta 7039), reused 7431 (delta 7006), pack-reused 486390[K
Receiving objects: 100% (493869/493869), 960.83 MiB | 31.04 MiB/s, done.
Resolving deltas: 100% (460283/460283), done.
error: pathspec 'v0.3.0' did not match any file(s) known to git


In [54]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device is {}".format(device))

# training for 10 epochs
num_epochs = 10

for epoch in range(num_epochs):
    # training for one epoch
    train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, valid_dataloader, device=device)

device is cpu
Epoch: [0]  [ 0/26]  eta: 0:00:14  lr: 0.000052  loss: 13.3083 (13.3083)  bbox_regression: 5.0525 (5.0525)  classification: 8.2558 (8.2558)  time: 0.5456  data: 0.1221
Epoch: [0]  [10/26]  eta: 0:00:04  lr: 0.000564  loss: 11.1804 (11.6134)  bbox_regression: 3.2791 (3.4144)  classification: 8.2558 (8.1989)  time: 0.3093  data: 0.0134
Epoch: [0]  [20/26]  eta: 0:00:01  lr: 0.001076  loss: 10.9916 (11.5121)  bbox_regression: 2.9488 (3.3239)  classification: 8.1286 (8.1882)  time: 0.2853  data: 0.0029


ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 64, 1, 1])

In [None]:
evaluate(model, test_dataloader, device=device)