In [9]:
import torch
import torchvision
from torch.utils.data import DataLoader
import supervision #used to annotate frame and manage vision
import transformers #load detr model
import pytorch_lightning as pl #manage data training 
import os #access GPU
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import numpy as np
from PIL import Image, ImageDraw
from transformers import DetrImageProcessor, DetrForObjectDetection  # the first is the utilities used in preprocessing the image and post processing the detection, and the second command is the actual model
import matplotlib
import cv2
import random
import timm #load model backbone
import matplotlib.pyplot as plt
%load_ext tensorboard
%tensorboard --logdir lightning_logs/
%matplotlib inline
from pytorch_lightning import Trainer #Trigger the training
#References
#https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/train-huggingface-detr-on-custom-dataset.ipynb#scrollTo=pzOJhEPa39dZ


#step 1: Load dataset
model_dataset = r"C:\Users\49152\Downloads\Aquarium Combined.v2-raw-1024.coco"
train_dataset = os.path.join(model_dataset, 'train')
test_dataset = os.path.join(model_dataset, 'test')
valid_dataset = os.path.join(model_dataset, 'valid')
annotation_file_name = "_annotations.coco.json"

#step 2: using the coco class to load the dataset 
class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(self,
                 image_dir_path:str,
                 image_processor,
                 train: bool=True):
        annotation_file_path = os.path.join(image_dir_path, annotation_file_name)
        super(CocoDetection, self).__init__(image_dir_path, annotation_file_path)
        #preprocess images before parsing through neural network
        self.image_processor = image_processor

    def __getitem__(self, idx):
        # read in PIL image and target in COCO format
        # feel free to add data augmentation here before passing them to the next step
        img, target = super(CocoDetection, self).__getitem__(idx)

        # preprocess image and target (converting target to DETR format, resizing + normalization of both image and target)
        image_id = self.ids[idx]
        target = {'image_id': image_id, 'annotations': target}
        encoding = self.image_processor(images=img, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze() # remove batch dimension
        target = encoding["labels"][0] # remove batch dimension

        return pixel_values, target
# step 3: define image_processor(for postprocessing)
image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")


#step 4: create CocoDetection class for our data
train_cocoData = CocoDetection(image_dir_path = train_dataset, image_processor=image_processor, train=True)
test_cocoData = CocoDetection(image_dir_path = test_dataset, image_processor=image_processor, train=False)  
valid_cocoData = CocoDetection(image_dir_path = valid_dataset, image_processor=image_processor, train=False)

# To confirm data available for training , testing and validation
print("Number of training examples:", len(train_cocoData))
print("Number of testing examples:", len(test_cocoData))
print("Number of validation examples:", len(valid_cocoData))

#Step 5: To test that the data loaded properly, we load a random dataset and visualize from our model
image_ids = train_cocoData.coco.getImgIds()
#get ids of all images in train dataset and randomly select an image
image_id = random.choice(image_ids)
print('image = #{}'.format(image_id))

#step 6: load image and annotation
image= train_cocoData.coco.loadImgs(image_id)[0]
annotation = train_cocoData.coco.imgToAnns[image_id]
image_path = os.path.join(train_cocoData.root, image['file_name'])
image= cv2.imread(image_path)

# step 7: annotation : use supervision to convert coco annotations into detection objects used in annotating bounding boxes on the source image
detections = supervision.Detections.from_coco_annotations(coco_annotation=annotation)

#training
categories = train_cocoData.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}

labels = [
    f"{id2label[class_id]}"
    for _, _, class_id, _
    in detections
]
box_annotator = supervision.BoxAnnotator()
frame = box_annotator.annotate(scene=image, detections=detections, labels=labels)

#step 8 visualizing our random image
# #get_ipython().run_line_magic('matplotlib', 'inline')
# supervision.show_frame_in_notebook(image, (8,8))

#responsible for easing training and conducting inferencd in the neural network
# step 9: images are padded when training images of different sizes in a batch. 
#The images are padded to the biggest resolution in the batch, creating a corresponinding binary_pixel which is used to indicate which pixel are/are not real

def collate_fn(batch):
    pixel_values = [item[0] for item in batch]
    encoding = image_processor.pad(pixel_values, return_tensors="pt")
    labels = [item[1] for item in batch]
    return{
        'pixel_values': encoding['pixel_values'],
        'pixel_mask': encoding['pixel_mask'], #stores only 1's and 0's and determines if the pixel was originally in the image or not
        'labels': labels
    }
#selecting a batch size has a direct impact on the speed of the training and memory allocation
DataLoader_Train= DataLoader(dataset= train_cocoData, collate_fn = collate_fn, batch_size = 4, shuffle=True)
DataLoader_Test= DataLoader(dataset = test_cocoData, collate_fn = collate_fn)
DataLoader_Valid= DataLoader(dataset = valid_cocoData, collate_fn = collate_fn)

#step 10: Model Training: For every data model trainig, we do a loss calculation 
#to infer predicted value from the actual value. An optimizer is used to give optimized solutions on how to better the predicted value to move closer to the actual/desired output
# Then we perform backward propagation i.e tweaking the weight to values to move the predicted value closer to the actual output
# We use pytorch-lightening library to achieve this

class Detr(pl.LightningModule):
     def __init__(self, lr, lr_backbone, weight_decay):
         #initialize the model with image_processor i.e. resnet
         super().__init__()
         
         # replace COCO classification head with custom head
         # we specify the "no_timm" variant here to not rely on the timm library
         # for the convolutional backbone
         
         self.model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50",
                                                             revision="no_timm",
                                                             num_labels=len(id2label),
                                                             ignore_mismatched_sizes=True)
         
        # see https://github.com/PyTorchLightning/pytorch-lightning/pull/1896
        # DeTR uses the backbone learning rate() to apply first to the whole network and the only to the backbone
        # This has direct influence on the optimizer result
         # the lr decides the size of the step taken during training iteration to reach the deired output
         self.lr = lr
         self.lr_backbone = lr_backbone
         self.weight_decay = weight_decay

     def forward(self, pixel_values, pixel_mask):
       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask)

       return outputs

     def common_step(self, batch, batch_idx):
       pixel_values = batch["pixel_values"]
       pixel_mask = batch["pixel_mask"]
       labels = [{k: v.to(self.device) for k, v in t.items()} for t in batch["labels"]]

       outputs = self.model(pixel_values=pixel_values, pixel_mask=pixel_mask, labels=labels)

       loss = outputs.loss
       loss_dict = outputs.loss_dict

       return loss, loss_dict

     def training_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        # logs metrics , and average across each epoc for each training step
        self.log("training_loss", loss)
        for k,v in loss_dict.items():
          self.log("train_" + k, v.item())

        return loss

     def validation_step(self, batch, batch_idx):
        loss, loss_dict = self.common_step(batch, batch_idx)
        self.log("validation_loss", loss)
        for k,v in loss_dict.items():
          self.log("valid_" + k, v.item())

        return loss

     def configure_optimizers(self):
        param_dicts = [
              {"params": [p for n, p in self.named_parameters() if "backbone" not in n and p.requires_grad]},
              {
                  "params": [p for n, p in self.named_parameters() if "backbone" in n and p.requires_grad],
                  "lr": self.lr_backbone,
              },
        ]
        optimizer = torch.optim.AdamW(param_dicts, lr=self.lr,
                                  weight_decay=self.weight_decay)

        return optimizer

    #pass train and validation data loaders
     def DataLoader_Train(self):
        return DataLoader_Train

     def DataLoader_Valid(self):
        return DataLoader_Valid

# Start training.
    #Tensor boards are used in tracking the key matrix during training
    #create an instance of pytorch module 
    # Start tensorboard.
%load_ext tensorboard
%tensorboard --logdir lightning_logs/



model = Detr(lr=1e-4, lr_backbone=1e-5, weight_decay=1e-4)

 #pass one batch through the module
batch = next(iter(DataLoader_Train))

outputs = model(pixel_values=batch['pixel_values'], pixel_mask=batch['pixel_mask'])

#Step 11: Trigger the training
    #set EPOCHS
MAXEPOCHS = 200
#trainer = Trainer(max_epochs=MAXEPOCHS, gradient_clip_val=0.1, accumulate_grad_batches=8, log_every_n_steps=5)
trainer = Trainer(devices=1, accumulate_grad_batches= 8, max_epochs= MAXEPOCHS, accelerator="gpu", gradient_clip_algorithm=0.1, log_every_n_steps=5)

#trainer = Trainer(max_steps=300, gradient_clip_val=0.1)
trainer.fit(model)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 19112), started 2 days, 20:21:52 ago. (Use '!kill 19112' to kill it.)

loading annotations into memory...
Done (t=0.04s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Number of training examples: 448
Number of testing examples: 63
Number of validation examples: 127
image = #95
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 19112), started 2 days, 20:21:52 ago. (Use '!kill 19112' to kill it.)

Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([9, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([9]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


MisconfigurationException: No supported gpu backend found!

In [None]:
# import tqdm as notebook_tqdm
pip install ipywidgets

In [7]:
pip install https://github.com/Lightning-AI/lightning/archive/refs/heads/master.zip -U

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Collecting https://github.com/Lightning-AI/lightning/archive/refs/heads/master.zip
  Downloading https://github.com/Lightning-AI/lightning/archive/refs/heads/master.zip
     - 0 bytes ? 0:00:00
     - 79.7 kB 2.5 MB/s 0:00:00
     - 398.8 kB 5.1 MB/s 0:00:00
     \ 529.7 kB 4.2 MB/s 0:00:00
     \ 529.7 kB 4.2 MB/s 0:00:00
     | 550.2 kB 2.9 MB/s 0:00:00
     / 560.5 kB 2.4 MB/s 0:00:00
     / 577.8 kB 2.0 MB/s 0:00:00
     - 770.3 kB 2.1 MB/s 0:00:00
     - 770.3 kB 2.1 MB/s 0:00:00
     \ 770.3 kB 2.1 MB/s 0:00:00
     \ 770.3 kB 2.1 MB/s 0:00:00
     | 770.3 kB 2.1 MB/s 0:00:00
     | 770.3 kB 2.1 MB/s 0:00:00
     / 842.8 kB 1.4 MB/s 0:00:00
     / 863.3 kB 1.3 MB/s 0:00:00
     - 863.3 kB 1.3 MB/s 0:00:00
     \ 863.3 kB 1.3 MB/s 0:00:00
     \ 863.3 kB 1.3 MB/s 0:00:00
     | 863.3 kB 1.3 MB/s 0:00:00
     | 863.3 kB 1.3 MB/s 0:00:00
   

In [10]:
pip install PyTorch 1.10


Defaulting to user installation because normal site-packages is not writeable
Collecting PyTorch
  Downloading pytorch-1.0.2.tar.gz (689 bytes)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement 1.10 (from versions: none)
ERROR: No matching distribution found for 1.10


In [11]:
torch.cuda.is_available()

False

In [None]:
pip uninstall torch


In [None]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118