In [None]:
import cv2
import imutils
import glob
import matplotlib.pyplot as plt
import numpy as np

import torch
import torchvision
from torchvision.io import read_image
import torchvision.transforms as T
from torchvision.models.detection.rpn import AnchorGenerator
!pip install albumentations==0.4.6
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import json

from google.colab.patches import cv2_imshow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
def get_MobileNetmodel(trained=True, save_path=None):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)

  num_classes = 2  # 1 class (bird) + background

  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features

  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

  if trained:
    if save_path == None: print("No path to the saved model")
    else: model.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))
  model.to(device)
  return model

def get_MobileNet320model(trained=True, save_path=None):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=True)

  num_classes = 2  # 1 class (bird) + background

  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features

  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  model.to(device)
  if trained:
    if save_path == None: print("No path to the saved model")
    else: model.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))
  
  return model

def get_ResNet50model(trained=True, save_path=None):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
  #model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=True)
  num_classes = 2  # 1 class (bird) + background

  # get number of input features for the classifier
  in_features = model.roi_heads.box_predictor.cls_score.in_features

  # replace the pre-trained head with a new one
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

  if trained:
      if save_path == None: print("No path to the saved model")
      else: model.load_state_dict(torch.load(save_path, map_location=torch.device('cpu')))

  model.to(device)
  return model

def transforminput(maxsize):
    transforms = []
    # transforms.append(A.PadIfNeeded(maxsize, maxsize, border_mode=cv2.BORDER_CONSTANT))
    # transforms.append(A.Resize(576, 576))
    transforms.append(A.CenterCrop(width=576, height=576))
    return A.Compose(transforms)

def transformback(size):
    transforms = []
    transforms.append(A.Resize(size[0], size[1]))        
    transforms.append(ToTensorV2(p=1.0)) 
    return A.Compose(transforms, bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

toTensor = T.Compose([T.ToTensor()])

In [None]:
###################
# Video Detection #
###################
# ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/"
# MODEL_NAME = "bestfasterrcnnv2_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch50_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"

ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/v1/"
# MODEL_NAME = "bestfasterrcnnv1_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
MODEL_NAME = "bestfasterrcnnv1_MobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
# model = get_ResNet50model(trained=True, save_path=ROOT_DIR_SAVING + MODEL_NAME)
model = get_MobileNetmodel(trained=True, save_path=ROOT_DIR_SAVING + MODEL_NAME)
# model = get_MobileNet320model(trained=True, save_path=ROOT_DIR_SAVING + MODEL_NAME)
model.eval()

print("------------Load------------")

vid = cv2.VideoCapture("/content/drive/MyDrive/Thesis/videos/vid7.mp4")
# cv2.namedWindow("img", cv2.WINDOW_NORMAL)
iter = 0
all_frames = []
all_boxes = []
imgcount = 1
while vid.isOpened():
    # print("Enter")
    ret, orig_frame = vid.read()
    # print(ret)
    if ret == True:
        orig_shape = orig_frame.shape
        size = (orig_shape[0], orig_shape[1])
        frame = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB).astype(np.float32)
        # frame = transforminput(max(orig_shape))(image=frame)['image']
        # frame = frame[0:0+1576, 0:0+1576]

        frame /= 255.0
        # frame = cv2.resize(frame, (32*192,32*108))
        tensorframe = toTensor(frame)
        tensorframe = tensorframe[None, ...].cuda()
        outputs = model(tensorframe)
        outputs = [{k: v.to(device) for k, v in t.items()} for t in outputs]

        score  = outputs[0]['scores']
        predboxes = outputs[0]['boxes']
        labels = outputs[0]["labels"].cpu().detach().numpy().astype(np.int32)
        print("Score", score)

        keepidx = torchvision.ops.nms(predboxes, score, 0.2).cpu().detach().numpy().astype(np.int32)
        score  = outputs[0]['scores'].cpu().detach().numpy()
        output = outputs[0]['boxes'].cpu().detach().numpy().astype(np.int32)
        score = score[keepidx]
        output = output[keepidx]
        labels = labels[keepidx]

        confidenceidx = [idx for idx, elt in enumerate(score) if elt >= 0.1]
        output = output[confidenceidx]
        score = score[confidenceidx]
        labels = labels[confidenceidx]
        print("NMSScore", score)

        sample = {
            'image': frame,
            'bboxes': output,
            'labels': labels
        }
        print(size)
        sample = transformback(size)(**sample)
        boxes = sample['bboxes']

        # loop over the boxes
        for idx, box in enumerate(boxes):
            cv2.rectangle(orig_frame,
                        (int(box[0]), int(box[1])),
                        (int(box[2]), int(box[3])),
                        (255, 0, 0), 5)
        #     cv2.putText(orig_frame, 'Bird '+ str(round(score[idx],3)), (int(box[0]), int(box[1])-15), cv2.FONT_HERSHEY_SIMPLEX, 1.6, (255,0,0), 3)

        all_frames.append(orig_frame)
        all_boxes.append(boxes)

        iter += 1
        imS = cv2.resize(orig_frame, (960, 540))
        cv2_imshow(imS)
        cv2.waitKey(1)
        if 0xFF == ord('q'):
            break
    else:
        break

print("----------Prediction Done----------")
# SAVE_PATH = "/content/drive/MyDrive/Thesis/videos/prediction/predictionA10_Batch8_Epoch50_224.mp4"
SAVE_PATH = "/content/drive/MyDrive/Thesis/videos/prediction/predvid7Mobile.mp4"
size = (size[1], size[0])
out = cv2.VideoWriter(SAVE_PATH,cv2.VideoWriter_fourcc(*'mp4v'), 20, size)
 
for i in range(len(all_frames)):
    out.write(all_frames[i])
out.release()
print("--------------Saved----------------")

In [None]:
###################
# Image Detection #
###################
import time
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/"
# MODEL_NAME = "bestfasterrcnnv2_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch50_8020_rc.pth"
MODEL_NAME = "bestMobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"
# MODEL_NAME = "bestMobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100_8020_rc.pth"

# ROOT_DIR_SAVING = "/content/drive/MyDrive/Thesis/savedmodel/v1/"
# MODEL_NAME = "bestfasterrcnnv1_SGD0005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
# MODEL_NAME = "bestfasterrcnnv1_MobileNetv3_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
# MODEL_NAME = "bestfasterrcnnv1_MobileNetv3320_SGD00005_SchedulerReduceLR_Size576_Batch8_Epoch100.pth"
# model = get_ResNet50model(trained=True, save_path=ROOT_DIR_SAVING + MODEL_NAME)
model = get_MobileNetmodel(trained=True, save_path=ROOT_DIR_SAVING + MODEL_NAME)
model.eval()

def imagedetection(orig_frame):
  orig_frame = orig_frame[120:120+576, 600:600+576] #Pigeon
  # orig_frame = orig_frame[1000:1000+576, 1800:1800+576] #v3
  # orig_frame = orig_frame[700:700+576, 1850:1850+576] #v1
  # orig_frame = orig_frame[100:100+576, 530:530+576] #imgAU1
  orig_shape = orig_frame.shape
  print(orig_shape)
  frame = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB).astype(np.float32)
  frame /= 255.0
  # frame = cv2.resize(frame, (576, 576))
  tensorframe = toTensor(frame)
  tensorframe = tensorframe[None, ...].to(device)
  
  start = time.time()
  outputs = model(tensorframe)
  end = time.time()
  print("Time for one image", end - start)
  outputs = [{k: v.to(device) for k, v in t.items()} for t in outputs]

  score  = outputs[0]['scores']
  predboxes = outputs[0]['boxes']

  keepidx = torchvision.ops.nms(predboxes, score, 0.2).cpu().detach().numpy().astype(np.int32)
  score  = outputs[0]['scores'].cpu().detach().numpy()
  output = outputs[0]['boxes'].cpu().detach().numpy().astype(np.int32)
  score = score[keepidx]
  output = output[keepidx]

  confidenceidx = [idx for idx, elt in enumerate(score) if elt >= 0.1]
  output = output[confidenceidx]
  score = score[confidenceidx]

  fig, ax = plt.subplots(1, 1, figsize=(16, 8))
  sample = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB)
  print("Size", sample.shape)
  for box in output:
      cv2.rectangle(sample,
                  (int(box[0]), int(box[1])),
                  (int(box[2]), int(box[3])),
                  (0, 0, 255), 2)
      # cv2.putText(sample, str(round(score[idx],3)), (int(box[0]), int(box[1])-6), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (1,0,0), 1)
      
  ax.imshow((sample).astype(np.uint8))
  ax.axis('off')
  # plt.savefig("/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/imagerapport/imgpigeonv2.pdf", bbox_inches='tight')
  plt.show()

# orig_frame = cv2.imread('/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/dataset/all_images/96.jpg', 1)
# orig_frame = cv2.imread('/content/drive/MyDrive/Thesis/videos/video2image/v5/v5img12.jpg', 1)
# orig_frame = cv2.imread('/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/imgAU1.jpg', 1)
orig_frame = cv2.imread('/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/ngbird1.JPG', 1)
# orig_frame = cv2.imread('/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/flyingbirdimg1.jpg', 1)
# orig_frame = cv2.imread('/content/drive/MyDrive/Github/BirdDetection-FeaturesExtraction/images/COCO1.jpg', 1)
imagedetection(orig_frame)