In [None]:
# Line to save a loooooot of time
!pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html

In [None]:
# !rm mmdetection -r
!pip install -U sahi
!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html #check the current version of torch and cuda
                                                                                                #!!! only work with torch 1.x.0
!pip install mmdet

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T
import numpy as np
from PIL import Image
import cv2

!pip install "opencv-python-headless<4.3"
from sahi.model import MmdetDetectionModel
from sahi.predict import get_sliced_prediction
from sahi.utils.cv import read_image_as_pil
from sahi.prediction import ObjectPrediction

from sahi.utils.torch import is_torch_cuda_available, empty_cuda_cache

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# SAHI

In [None]:
# Run these cells to use them below
# Class from the official code on the SAHI github
class DetectionModel:
    def __init__(
        self,
        model_path=None,
        config_path=None,
        device=None,
        mask_threshold: float = 0.5,
        prediction_score_threshold: float = 0.3,
        category_mapping=None,
        category_remapping=None,
        load_at_init: bool = True,
    ):
        """
        Init object detection/instance segmentation model.
        Args:
            model_path: str
                Path for the instance segmentation model weight
            config_path: str
                Path for the mmdetection instance segmentation model config file
            device: str
                Torch device, "cpu" or "cuda"
            mask_threshold: float
                Value to threshold mask pixels, should be between 0 and 1
            prediction_score_threshold: float
                All predictions with score < prediction_score_threshold will be discarded
            category_mapping: dict: str to str
                Mapping from category id (str) to category name (str) e.g. {"1": "pedestrian"}
            category_remapping: dict: str to int
                Remap category ids based on category names, after performing inference e.g. {"car": 3}
            load_at_init: bool
                If True, automatically loads the model at initalization
        """
        self.model_path = model_path
        self.config_path = config_path
        self.model = None
        self.device = device
        self.mask_threshold = mask_threshold
        self.prediction_score_threshold = prediction_score_threshold
        self.category_mapping = category_mapping
        self.category_remapping = category_remapping
        self._original_predictions = None
        self._object_prediction_list = None

        # automatically set device if its None
        if not (self.device):
            self.device = "cuda:0" if is_torch_cuda_available() else "cpu"

        # automatically load model if load_at_init is True
        if load_at_init:
            self.load_model()

    def load_model(self):
        """
        This function should be implemented in a way that detection model
        should be initialized and set to self.model.
        (self.model_path, self.config_path, and self.device should be utilized)
        """
        NotImplementedError()

    def unload_model(self):
        """
        Unloads the model from CPU/GPU.
        """
        self.model = None
        empty_cuda_cache()

    def perform_inference(self, image: np.ndarray):
        """
        This function should be implemented in a way that prediction should be
        performed using self.model and the prediction result should be set to self._original_predictions.
        Args:
            image: np.ndarray
                A numpy array that contains the image to be predicted.
        """
        NotImplementedError()

    def _create_object_prediction_list_from_original_predictions(
        self,
        shift_amount=[0, 0],
        full_shape=None,
    ):
        """
        This function should be implemented in a way that self._original_predictions should
        be converted to a list of prediction.ObjectPrediction and set to
        self._object_prediction_list. self.mask_threshold can also be utilized.
        Args:
            shift_amount: list
                To shift the box and mask predictions from sliced image to full sized image, should be in the form of [shift_x, shift_y]
            full_image_size: list
                Size of the full image after shifting, should be in the form of [height, width]
        """
        NotImplementedError()

    def _apply_category_remapping(self):
        """
        Applies category remapping based on mapping given in self.category_remapping
        """
        # confirm self.category_remapping is not None
        assert (
            self.category_remapping is not None
        ), "self.category_remapping cannot be None"
        # remap categories
        for object_prediction in self._object_prediction_list:
            old_category_id_str = str(object_prediction.category.id)
            new_category_id_int = self.category_remapping[old_category_id_str]
            object_prediction.category.id = new_category_id_int

    def convert_original_predictions(
        self,
        shift_amount=[0, 0],
        full_shape=None,
    ):
        """
        Converts original predictions of the detection model to a list of
        prediction.ObjectPrediction object. Should be called after perform_inference().
        Args:
            shift_amount: list
                To shift the box and mask predictions from sliced image to full sized image, should be in the form of [shift_x, shift_y]
            full_shape: list
                Size of the full image after shifting, should be in the form of [height, width]
        """
        self._create_object_prediction_list_from_original_predictions(
            shift_amount=shift_amount,
            full_shape=full_shape,
        )
        if self.category_remapping:
            self._apply_category_remapping()

    @property
    def object_prediction_list(self):
        return self._object_prediction_list

    @property
    def original_predictions(self):
        return self._original_predictions

    def _create_predictions_from_object_prediction_list(object_prediction_list):
        """
        This function should be implemented in a way that it converts a list of
        prediction.ObjectPrediction instance to detection model's original prediction format.
        Then returns the converted predictions.
        Can be considered as inverse of _create_object_prediction_list_from_predictions().
        Args:
            object_prediction_list: a list of prediction.ObjectPrediction
        Returns:
            original_predictions: a list of converted predictions in models original output format
        """
        NotImplementedError()

class MmdetDetectionModel(DetectionModel):
    def load_model(self):
        """
        Detection model is initialized and set to self.model.
        """
        from mmdet.apis import init_detector

        # set model
        # model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=True) #ResNet50
        model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True) #MobileNetv3
        # model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True) #MobileNetv3 320
        num_classes = 2  # 1 class (bird) + background

        # get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features

        # replace the pre-trained head with a new one
        currentdevice = 'cuda' if torch.cuda.is_available() else 'cpu'
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        model.load_state_dict(torch.load(self.model_path, map_location=torch.device(currentdevice)))
        model.eval()
        self.model = model

        # set category_mapping
        if not self.category_mapping:
            category_mapping = {
                str(ind): category_name
                for ind, category_name in enumerate(self.category_names)
            }
            self.category_mapping = category_mapping

    def perform_inference(self, image: np.ndarray): #, image_size
        """
        Prediction is performed using self.model and the prediction result is set to self._original_predictions.
        Args:
            image: np.ndarray
                A numpy array that contains the image to be predicted.
        """
        # Confirm model is loaded
        assert (
            self.model is not None
        ), "Model is not loaded, load it by calling .load_model()"

        # Supports only batch of 1
        # from mmdet.apis import inference_detector

        # prediction_result = inference_detector(self.model, image)
        # print("Inference")
        # self.model.eval()
        toTensor = T.Compose([T.ToTensor()])
        tensorimage = toTensor(image)
        tensorimage = tensorimage[None, ...]
        prediction_result = self.model(tensorimage)

        self._original_predictions = prediction_result

    @property
    def num_categories(self):
        """
        Returns number of categories
        """
        # if isinstance(self.model.CLASSES, str):
        #     num_categories = 1
        # else:
        #     num_categories = len(self.model.CLASSES)
        num_classes = 2
        return num_classes

    @property
    def has_mask(self):
        """
        Returns if model output contains segmentation mask
        """
        has_mask = self.model.with_mask
        return has_mask

    @property
    def category_names(self):
        # if self.num_categories == 1:
        #     return [self.model.CLASSES]
        # else:
        #     return self.model.CLASSES
        return ["NoBird", "Bird"]

    def _create_object_prediction_list_from_original_predictions(
        self,
        shift_amount=[0, 0],
        full_shape=None,
    ):
        """
        self._original_predictions is converted to a list of prediction.ObjectPrediction and set to
        self._object_prediction_list.
        Args:
            shift_amount: list
                To shift the box and mask predictions from sliced image to full sized image, should be in the form of [shift_x, shift_y]
            full_image_size: list
                Size of the full image after shifting, should be in the form of [height, width]
        """
        fasterrcnn_predictions = self._original_predictions

        object_prediction_list = []

        # process predictions
        # print("Enter")
        # print(fasterrcnn_predictions[0])
        keepidx = torchvision.ops.nms(fasterrcnn_predictions[0]['boxes'], fasterrcnn_predictions[0]['scores'], 0.0).cpu().detach().numpy().astype(np.int32)

        boxes = fasterrcnn_predictions[0]["boxes"].cpu().detach().numpy().astype(np.int32)
        labels =  fasterrcnn_predictions[0]["labels"].cpu().detach().numpy().astype(np.int32)
        scores =  fasterrcnn_predictions[0]["scores"].cpu().detach().numpy()
        boxes = boxes[keepidx]
        scores = scores[keepidx]
        labels = labels[keepidx]

        confidenceidx = [idx for idx, elt in enumerate(scores) if elt >= 0.5]
        boxes = boxes[confidenceidx]
        scores = scores[confidenceidx]
        labels = labels[confidenceidx]

        for idx, bbox in enumerate(boxes):
          object_prediction = ObjectPrediction(
              bbox=bbox,
              category_id=int(labels[idx]),
              score=scores[idx],
              category_name=self.category_names[int(labels[idx])],
              shift_amount=shift_amount,
              full_shape=full_shape,
          )

          # append ObjectPrediction object to object_prediction_list
          object_prediction_list.append(object_prediction)

        self._object_prediction_list = object_prediction_list

    def _create_original_predictions_from_object_prediction_list(
        self,
        object_prediction_list,
    ):
        """
        Converts a list of prediction.ObjectPrediction instance to detection model's original prediction format.
        Then returns the converted predictions.
        Can be considered as inverse of _create_object_prediction_list_from_predictions().
        Args:
            object_prediction_list: a list of prediction.ObjectPrediction
        Returns:
            original_predictions: a list of converted predictions in models original output format
        """
        # init variables
        boxes = []
        masks = []
        num_categories = self.num_categories
        category_id_list = np.arange(num_categories)
        category_id_to_bbox = {category_id: [] for category_id in category_id_list}
        category_id_to_mask = {category_id: [] for category_id in category_id_list}
        # form category_to_bbox and category_to_mask dicts from object_prediction_list
        for object_prediction in object_prediction_list:
            category_id = object_prediction.category.id
            # form bbox as 1x5 list [xmin, ymin, xmax, ymax, score]
            bbox = object_prediction.bbox.to_voc_bbox()
            bbox.extend([object_prediction.score.score])
            category_id_to_bbox[category_id].append(np.array(bbox, dtype=np.float32))
            # form 2d bool mask
            if self.has_mask:
                mask = object_prediction.mask.bool_mask
                category_id_to_mask[category_id].append(mask)

        for category_id in category_id_to_bbox.keys():
            if not category_id_to_bbox[category_id]:
                # add 0x5 array to boxes for empty categories
                boxes.append(np.zeros((0, 5), dtype=np.float32))
                if self.has_mask:
                    masks.append([])
            else:
                # form boxes and masks
                boxes.append(np.array(category_id_to_bbox[category_id]))
                if self.has_mask:
                    masks.append(np.array(category_id_to_mask[category_id]))
        # form final output
        if self.has_mask:
            original_predictions = (boxes, masks)
        else:
            original_predictions = boxes

        return original_predictions

In [None]:
def SAHIalgorithm(img, detection_model):
  """ Apply the SAHI algorithm on the image and return the boxes.
  """
  result = get_sliced_prediction(
    img,
    detection_model,
    slice_height = 224,
    slice_width = 224,
    overlap_height_ratio = 0.1,
    overlap_width_ratio = 0.1
  )
  cocodict = result.to_coco_predictions()

  # Transform the boxes in PascalVOC format
  boxes = []
  for i in cocodict:
    box = i["bbox"]
    boxes.append([box[0], box[1], box[0] + box[2], box[1] + box[3]])

  return boxes

# Video Detection With SAHI (version that save the predictions not the video)

In [None]:
from google.colab.patches import cv2_imshow
##############################################################################
# Video Detection With SAHI (version that save the predictions not the video) #
##############################################################################
devicec = 'cuda' if torch.cuda.is_available() else 'cpu'
print(devicec)
# ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/"
# MODEL_NAME = "bestfasterrcnnv2_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch50_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"

ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/v1/"
# MODEL_NAME = "bestfasterrcnnv1_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
MODEL_NAME = "bestfasterrcnnv1_MobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
checkpoint = ROOT_DIR_SAVING + MODEL_NAME
model = MmdetDetectionModel(
    # model_path=mmdet_cascade_mask_rcnn_model_path,
    model_path=checkpoint,
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
)

print("------------Load------------")
vid = cv2.VideoCapture("/content/drive/MyDrive/Thesis/videos/presentationvideos/starlings4.mov")
# vid = cv2.VideoCapture("/content/drive/MyDrive/Thesis/videos/A4.mp4")

count=0
iter = 0
all_frames = []
all_boxes = []
all_birdamt = []
while vid.isOpened():
    # print("Enter")
    ret, orig_frame = vid.read()
    # print(ret)
    if ret == True:
        orig_shape = orig_frame.shape
        # orig_frame = orig_frame[200:200+576, 1700:1700+576]
        print(orig_shape)
        # frame = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB).astype(np.float32) # array rgb
        pil_frame = Image.fromarray(orig_frame, "RGB")
        # pil_frame = pil_frame.crop((200, 1700, 200+576, 1700+576))
        pil_frame = read_image_as_pil(pil_frame)

        boxes = SAHIalgorithm(pil_frame, model)

        # loop over the boxes
        for idx, box in enumerate(boxes):
            cv2.rectangle(orig_frame,
                        (int(box[0]), int(box[1])),
                        (int(box[2]), int(box[3])),
                        (255, 0, 0), 1)
            # cv2.putText(orig_frame, "Bird Amount:"+ str(len(boxes)), (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)
            cv2.putText(orig_frame, str(len(boxes)), (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)
            # cv2.putText(orig_frame, str(int((int(box[0])+int(box[1]))/2))+"x"+str(int((int(box[2])+int(box[3]))/2)), (int(box[0])-10, int(box[3])+10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
            # cv2.putText(orig_frame, str(idx+1), (int(box[0]), int(box[1])-5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
        # fig, ax = plt.subplots(1, 1, figsize=(16, 8))
        # ax.imshow(cv2.cvtColor(orig_frame, cv2.COLOR_RGB2BGR))
        # # plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/"+str(image_id)+"area" + ".png")
        # ax.axis('off')
        # plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/imgv6ResNetSahi.jpg", bbox_inches='tight')
        # plt.show()


        size = (orig_shape[0], orig_shape[1])
        all_frames.append(orig_frame)
        all_boxes.append(boxes)
        iter += 1
        imS = cv2.resize(orig_frame, (960, 540))
        cv2_imshow(imS)

        count+=1
        if ((count % 5) == 0): # Saving for security
          print("Saved")
          PSAVE_PATH = "/content/drive/MyDrive/Thesis/videos/presentationvideos/starlings4.npy"
          data = np.asarray([all_birdamt, all_boxes])
          import pickle as cPickle
          with open(PSAVE_PATH, 'wb') as outp:  # Overwrites any existing file.
              cPickle.dump(data, outp, cPickle.HIGHEST_PROTOCOL)
        if 0xFF == ord('q'):
          print("Break")
          break
    else:
        break

print("----------Prediction Done----------")
print("Saving")
PSAVE_PATH = "/content/drive/MyDrive/Thesis/videos/presentationvideos/predstarlings4.npy"
data = np.asarray([all_birdamt, all_boxes])
import pickle as cPickle
with open(PSAVE_PATH, 'wb') as outp:  # Overwrites any existing file.
    cPickle.dump(data, outp, cPickle.HIGHEST_PROTOCOL)
print("--------------Saved----------------")

Add the information on video based on a file

In [None]:
##################################
# Add saved information on video #
##################################
!pip3 install pickle5
import pickle5 as cPickle
# import pickle as cPickle
import cv2

PSAVE_PATH = "/content/drive/MyDrive/Thesis/videos/presentationvideos/finalresult/starlings.npy"
file = open(PSAVE_PATH,'rb')
dataPickle = file.read()
file.close()
res = cPickle.loads(dataPickle)

all_boxes = res[1]

vid = cv2.VideoCapture("/content/drive/MyDrive/Thesis/videos/presentationvideos/starlings.mov")
# vid = cv2.VideoCapture("/content/drive/MyDrive/Thesis/videos/A4.mp4")

print("------Putting infos on frames------")
frameidx = 0
all_frames = []
while vid.isOpened():
    # print("Enter")
    ret, orig_frame = vid.read()
    # print(ret)
    if ret == True:
        orig_shape = orig_frame.shape

        if len(all_boxes) == frameidx:
            break
        
        boxes = all_boxes[frameidx]
        
        # loop over the boxes
        for idx, box in enumerate(boxes):
            cv2.rectangle(orig_frame,
                        (int(box[0]), int(box[1])),
                        (int(box[2]), int(box[3])),
                        (255, 0, 0), 1)
            # cv2.putText(orig_frame, "Bird Amount:"+ str(len(boxes)), (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)
            cv2.putText(orig_frame, str(len(boxes)), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)
            cv2.putText(orig_frame, str(int((int(box[0])+int(box[1]))/2))+"x"+str(int((int(box[2])+int(box[3]))/2)), (int(box[0])-10, int(box[3])+10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
            # cv2.putText(orig_frame, str(idx+1), (int(box[0]), int(box[1])-5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)

        size = (orig_shape[0], orig_shape[1])
        all_frames.append(orig_frame)
        frameidx += 1

        if 0xFF == ord('q'):
            break
    else:
        break

print("------Saving the video------")
SAVE_PATH = "/content/drive/MyDrive/Thesis/videos/presentationvideos/predstarlings.mp4"
size = (size[1], size[0])
fps = 30
out = cv2.VideoWriter(SAVE_PATH,cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
print("Video "+str(fps)+" fps.")

for i in range(len(all_frames)):
    out.write(all_frames[i])
out.release()
print("------Finish------")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pickle5
  Using cached pickle5-0.0.12-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (256 kB)
Installing collected packages: pickle5
Successfully installed pickle5-0.0.12
------Putting infos on frames------
------Saving the video------
Video 30 fps.
------Finish------


# Video Detection with SAHI

In [None]:
from google.colab.patches import cv2_imshow
############################
# Video Detection With SAHI#
############################
def saveimages(all_frames, size):
  SAVE_PATH = "/content/drive/MyDrive/Thesis/videos/prediction/predictionACs2_Batch8_Epoch50_224.mp4"
  size = (size[1], size[0])
  out = cv2.VideoWriter(SAVE_PATH,cv2.VideoWriter_fourcc(*'mp4v'), 20, size)
  
  for i in range(len(all_frames)):
      out.write(all_frames[i])
  out.release()

devicec = 'cuda' if torch.cuda.is_available() else 'cpu'
print(devicec)
# ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/"
# MODEL_NAME = "bestfasterrcnnv2_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch50_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"

ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/v1/"
MODEL_NAME = "bestfasterrcnnv1_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
# MODEL_NAME = "bestfasterrcnnv1_MobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
checkpoint = ROOT_DIR_SAVING + MODEL_NAME
model = MmdetDetectionModel(
    # model_path=mmdet_cascade_mask_rcnn_model_path,
    model_path=checkpoint,
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
)

print("------------Load------------")
vid = cv2.VideoCapture("/content/drive/MyDrive/Thesis/videos/A10.mp4")

count=0
iter = 0
all_frames = []
all_boxes = []
while vid.isOpened():
    # print("Enter")
    ret, orig_frame = vid.read()
    # print(ret)
    if ret == True:
        orig_shape = orig_frame.shape
        # orig_frame = orig_frame[200:200+576, 1700:1700+576]
        print(orig_shape)
        # frame = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB).astype(np.float32) # array rgb
        pil_frame = Image.fromarray(orig_frame, "RGB")
        # pil_frame = pil_frame.crop((200, 1700, 200+576, 1700+576))
        pil_frame = read_image_as_pil(pil_frame)

        boxes = SAHIalgorithm(pil_frame, model)

        # loop over the boxes
        for idx, box in enumerate(boxes):
            cv2.rectangle(orig_frame,
                        (int(box[0]), int(box[1])),
                        (int(box[2]), int(box[3])),
                        (255, 0, 0), 1)
            # cv2.putText(orig_frame, "Bird Amount:"+ str(len(boxes)), (100, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)
            cv2.putText(orig_frame, str(len(boxes)), (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 3)
            # cv2.putText(orig_frame, str(int((int(box[0])+int(box[1]))/2))+"x"+str(int((int(box[2])+int(box[3]))/2)), (int(box[0])-10, int(box[3])+10), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
            # cv2.putText(orig_frame, str(idx+1), (int(box[0]), int(box[1])-5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
        # fig, ax = plt.subplots(1, 1, figsize=(16, 8))
        # ax.imshow(cv2.cvtColor(orig_frame, cv2.COLOR_RGB2BGR))
        # # plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/"+str(image_id)+"area" + ".png")
        # ax.axis('off')
        # plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/imgv6ResNetSahi.jpg", bbox_inches='tight')
        # plt.show()


        size = (orig_shape[0], orig_shape[1])
        all_frames.append(orig_frame)
        all_boxes.append(boxes)
        iter += 1
        imS = cv2.resize(orig_frame, (960, 540))
        # imS = cv2.resize(orig_frame, (576, 576))
        cv2_imshow(imS)
        # cv2.waitKey(1)
        # # Press Q on keyboard to  exit
        # if cv2.waitKey(500) & 0xFF == ord('q'):
        #     break
        saveimages(all_frames, size)
        count+=1
        if ((count % 5) == 0): # Save security
          print("Saved")
          SAVE_PATH = "/content/drive/MyDrive/Thesis/videos/prediction/predvid6Mobile224.mp4"
          size = (size[1], size[0])
          out = cv2.VideoWriter(SAVE_PATH,cv2.VideoWriter_fourcc(*'mp4v'), 10, size)
          
          for i in range(len(all_frames)):
              out.write(all_frames[i])
          out.release()
        if 0xFF == ord('q'):
            break
    else:
        break

print("----------Prediction Done----------")
saveimages(all_frames, size)
print("--------------Saved----------------")

# Image Detection

In [None]:
###################
# Image Detection #
###################
import time
import matplotlib.pyplot as plt

devicec = 'cuda' if torch.cuda.is_available() else 'cpu'
print(devicec)
ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/"
# MODEL_NAME = "bestfasterrcnnv2_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch50_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"

ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/v1/"
# MODEL_NAME = "bestfasterrcnnv1_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
MODEL_NAME = "bestfasterrcnnv1_MobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
# MODEL_NAME = "bestfasterrcnnv1_MobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
checkpoint = ROOT_DIR_SAVING + MODEL_NAME
model = MmdetDetectionModel(
    # model_path=mmdet_cascade_mask_rcnn_model_path,
    model_path=checkpoint,
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
)

orig_frame = cv2.imread('/content/drive/MyDrive/Thesis/videos/video2image/v5/v5img12.jpg', 1)
orig_shape = orig_frame.shape
print(orig_shape)
# frame = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB).astype(np.float32) # array rgb
pil_frame = Image.fromarray(orig_frame, "RGB")
pil_frame = read_image_as_pil(pil_frame)

start = time.time()
boxes = SAHIalgorithm(pil_frame, model)
end = time.time()
print("Time for SAHI on one image", end - start)

fig, ax = plt.subplots(1, 1, figsize=(16, 8))
sample = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB)
print("Size", sample.shape)
for box in boxes:
    cv2.rectangle(sample,
                (int(box[0]), int(box[1])),
                (int(box[2]), int(box[3])),
                (0, 0, 255), 2)
    
ax.imshow((sample).astype(np.uint8))
# plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/"+str(image_id)+"area" + ".png")
ax.axis('off')
plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/imagerapport/imgv5ResNetSAHI.pdf", bbox_inches='tight')
plt.show()

In [None]:
SAVE_PATH = "/content/drive/MyDrive/Thesis/videos/prediction/predvid7ResNet.mp4"
size = (size[1], size[0])
out = cv2.VideoWriter(SAVE_PATH,cv2.VideoWriter_fourcc(*'mp4v'), 20, size)
 
for i in range(len(all_frames)):
    out.write(all_frames[i])
out.release()