In [1]:
# connect colab notebook to drive
from google.colab import drive

# mount google drive
drive.mount('/content/gdrive')

# change directory to project's directory
%cd /content/gdrive/My Drive/Parallel_score_project

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/Parallel_score_project


In [2]:
!pip install torch==1.10.0+cu111 torchvision==0.11.0+cu111 torchaudio==0.10.0 -f https://download.pytorch.org/whl/torch_stable.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [3]:
# import pytorch and check the version of pytorch and cuda running
import torch
TORCH_VERSION = '.'.join(torch.__version__.split('.')[:2])
CUDA_VERSION = torch.__version__.split('+')[-1]
print(f'torch : {TORCH_VERSION}, cuda : {CUDA_VERSION}')

torch : 1.10, cuda : cu111


In [4]:
# install detectron2 with the specific torch and cuda versions
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html


In [5]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
import tqdm

In [23]:
# read video file
video = cv2.VideoCapture('./deepsort_30sec.mp4')

In [24]:
import cv2
from google.colab.patches import cv2_imshow

In [25]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml'))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml')

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6

In [26]:
predictor = DefaultPredictor(cfg)


In [27]:
# get width and height of frames in the video
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

# get frames per second of video and ttal number of frames
fps = video.get(cv2.CAP_PROP_FPS)
num_frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
print("Video farmes width: ", width, "Video frames Height: ", height, "Frames per second of video: ", fps, "Total number of frames: ", num_frames)

Video farmes width:  1280 Video frames Height:  720 Frames per second of video:  25.0 Total number of frames:  751.0


In [28]:
# initialize the video writer
video_writer = cv2.VideoWriter('object_detection_output_persons_class_onl_exp.mp4', fourcc=cv2.VideoWriter_fourcc(*'mp4v'), fps=float(fps), frameSize=(width, height), isColor=True)

In [29]:
# Initialize visualizer
v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), ColorMode.IMAGE)

In [13]:
def run_model_on_video(video, maxFrames):
    """ 
    Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn for only the persons class.
    """

    readFrames = 0
    while True:
        hasFrame, frame = video.read()
        if not hasFrame:
            break

        # Get prediction results for this frame
        outputs = predictor(frame)
      
        # Make sure the frame is colored
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # Draw a visualization of the predictions using the video visualizer
        visualization = v.draw_instance_predictions(frame,
                                                    outputs["instances"][outputs['instances'].pred_classes == 0].to('cpu')) # predict only the person class

        # Convert Matplotlib RGB format to OpenCV BGR format
        visualization = cv2.cvtColor(visualization.get_image(),
                                     cv2.COLOR_RGB2BGR)

        yield visualization

        readFrames += 1
        if readFrames > maxFrames:
            break

In [14]:
# Enumerate the frames of the video
for visualization in tqdm.tqdm(run_model_on_video(video, num_frames),
                               total=num_frames):

    # Write to video file
    video_writer.write(visualization)

# Release resources
video.release()
video_writer.release()
cv2.destroyAllWindows()

  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 751/751.0 [05:59<00:00,  2.09it/s]


In [32]:
def get_bboxes_and_scores(predictor, num_frames):

  output = []
  readFrames = 0
  while True:
    hasFrame, frame = video.read()
    if not hasFrame:
        break

    # Get prediction results for this frame
    outputs = predictor(frame)
    instances = outputs['instances']
    scores = instances.get_fields()['scores'].tolist()
    pred_boxes = instances.get_fields()['pred_boxes'].tensor.tolist()

    outs = []
    for i in range(len(scores)):
      boxes = pred_boxes[i]
      score = scores[i]
      boxes.append(score)
      outs.append(boxes)

    output.append(outs)
    # print(output)
    readFrames += 1
    if readFrames > num_frames:
      break

  return output

In [33]:
output = get_bboxes_and_scores(predictor=predictor, num_frames=num_frames)

  max_size = (max_size + (stride - 1)) // stride * stride


In [34]:
output

[[[500.5431823730469,
   256.2647399902344,
   550.6124877929688,
   323.4969177246094,
   0.9971415400505066],
  [1062.6513671875,
   230.12742614746094,
   1094.7100830078125,
   296.6823425292969,
   0.9888440370559692],
  [997.5767822265625,
   256.7901306152344,
   1052.348388671875,
   332.1959533691406,
   0.9853020906448364],
  [948.9379272460938,
   224.263671875,
   981.6719970703125,
   280.420166015625,
   0.9815764427185059],
  [907.723388671875,
   656.9178466796875,
   948.5015869140625,
   719.8865356445312,
   0.9791978597640991],
  [757.7900390625,
   363.6324462890625,
   769.1299438476562,
   374.558837890625,
   0.9679536819458008],
  [1204.80810546875,
   406.248046875,
   1264.0537109375,
   502.7308044433594,
   0.9616699814796448],
  [795.178955078125,
   303.46746826171875,
   819.9342041015625,
   372.8009033203125,
   0.9516909718513489],
  [797.5501098632812,
   334.8853759765625,
   858.9533081054688,
   413.7969055175781,
   0.9481317400932312],
  [1174.4

## Display SORT IDs on detected images

In [13]:
!pip install filterpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [14]:
!cp "/content/gdrive/MyDrive/Parallel_score_project/sort.py" .

cp: '/content/gdrive/MyDrive/Parallel_score_project/sort.py' and './sort.py' are the same file


In [17]:
# import all dependencies from sort. For some weird reasons you you need to run this cell twice to import
from sort import *

In [16]:
from PIL import Image

In [20]:
# multiple object detections tracker using sort
mot_tracker1 = Sort(max_age=3, min_hits=1, iou_threshold=0.15)

In [21]:
def display_sort_id(video, max_frames):

  readFrames = 0
  while True:
    hasFrame, frame = video.read()
    if not hasFrame:
      break

    # Get prediction results for this frame
    outputs = predictor(frame)
    instances = outputs["instances"][outputs['instances'].pred_classes == 0].to('cpu')
    pred_boxes = instances.get_fields()['pred_boxes'].tensor.tolist()
    scores = instances.get_fields()['scores'].tolist()
    copy_img = np.array(frame).copy()
    img = Image.fromarray(copy_img)
    img = np.array(img)

    # get the bounding boxes coordinates and scores for each detection in a frame
    outs = []
    for i in range(len(scores)):
      boxes = pred_boxes[i]
      score = scores[i]
      boxes.append(score)
      outs.append(boxes)

    # convert the bounding boxes to numpy arrays
    dets = np.array(outs)
    # get the id of the boxes
    track_bbs_ids = mot_tracker1.update(dets)
    # convert the trackers to a list
    tracker_list = track_bbs_ids.tolist()
    # reverse the list
    tracker_list.reverse()

    for i in range(len(tracker_list)):
      bounding_box = tracker_list[i][:-1]
      tracker_id = str(round(tracker_list[i][-1]))
      x1 = int(round(bounding_box[0]))
      y1 = int(round(bounding_box[1]))
      x2 = int(round(bounding_box[2]))
      y2 = int(round(bounding_box[3]))
      
      start_point = (x1, y1)
      end_point = (x2, y2)
      cv2.rectangle(img, start_point, end_point, (255,0,0), 2)
      cv2.putText(img, tracker_id, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255,0,0), 2)

    yield img

    readFrames += 1
    if readFrames > max_frames:
      break

In [22]:
# Enumerate the frames of the video
for visualization in tqdm.tqdm(display_sort_id(video, num_frames),
                               total=num_frames):

    # Write to video file
    video_writer.write(visualization)

# Release resources
video.release()
video_writer.release()
cv2.destroyAllWindows()

  max_size = (max_size + (stride - 1)) // stride * stride
100%|█████████▉| 750/751.0 [04:39<00:00,  2.69it/s]


### Detect the color of the objects in the bounding box

In [39]:
path = "/content/gdrive/MyDrive/Parallel_score_project/yellow_players"

In [36]:
def display_sort_id_color(video, max_frames):

  readFrames = 0
  while True:
    hasFrame, frame = video.read()
    if not hasFrame:
      break

    # Get prediction results for this frame
    outputs = predictor(frame)
    instances = outputs["instances"][outputs['instances'].pred_classes == 0].to('cpu')
    pred_boxes = instances.get_fields()['pred_boxes'].tensor.tolist()
    scores = instances.get_fields()['scores'].tolist()
    copy_img = np.array(frame).copy()
    img = Image.fromarray(copy_img)
    img = np.array(img)

    # get the bounding boxes coordinates and scores for each detection in a frame
    outs = []
    for i in range(len(scores)):
      boxes = pred_boxes[i]
      score = scores[i]
      boxes.append(score)
      outs.append(boxes)

    # convert the bounding boxes to numpy arrays
    dets = np.array(outs)
    # get the id of the boxes
    track_bbs_ids = mot_tracker1.update(dets)
    # convert the trackers to a list
    tracker_list = track_bbs_ids.tolist()
    # reverse the list
    tracker_list.reverse()

    for i in range(len(tracker_list)):
      bounding_box = tracker_list[i][:-1]
      tracker_id = str(round(tracker_list[i][-1]))
      x1 = int(round(bounding_box[0]))
      y1 = int(round(bounding_box[1]))
      x2 = int(round(bounding_box[2]))
      y2 = int(round(bounding_box[3]))
      
      # get the colors of the detected objects
      hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
      lower_yellow = np.array([64, 24, 100])
      upper_yellow = np.array([61, 99, 100])

      # get a mask of the hsv image
      mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
      result = cv2.bitwise_and(frame, frame, mask=mask)

      cropped_image = img.crop((x1, y1, x2, y2))
      cropped_image.save( f'{path}{i}.jpg')

    readFrames += 1
    if readFrames > max_frames:
      break

In [37]:
a = display_sort_id_color(video, num_frames)

  max_size = (max_size + (stride - 1)) // stride * stride


In [38]:
a