<a href="https://colab.research.google.com/github/Projectmte2023/tankbot_one/blob/main/Litter_Tracking_and_Counting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **DETECTING,TRACKING AND COUNTING THE LITTER**

Developed with reference to the ‘notebooks/how-to-track-and-count-vehicles-with-yolov8.ipynb’ notebook from the github repository from Roboflow https://github.com/roboflow/notebooks

Upload the weights and the video to be processed

Set GPU Access: Edit->Notebook Setting->Hardware Accelerator->GPU->Save

In [None]:
#Checking access to GPU
!nvidia-smi

In [None]:
import os
HOME = os.getcwd()
print(HOME)

Set the source path to the name of the uploaded video

In [None]:
SOURCE_VIDEO_PATH = f"{HOME}/can_rolling.mp4" #file name of the video to be processed

Install YOLOv8

In [None]:
#pip installing YOLOV8
!pip install ultralytics==8.0.20

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

**Install ByteTrack**

In [None]:
%cd {HOME}
!git clone https://github.com/ifzhang/ByteTrack.git
%cd {HOME}/ByteTrack

!sed -i 's/onnx==1.8.1/onnx==1.9.0/g' requirements.txt

#installing required dependencies
!pip3 install -q -r requirements.txt
!python3 setup.py -q develop
!pip install -q cython_bbox
!pip install -q onemetric
!pip install yolox
!pip install loguru

from IPython import display
display.clear_output()


import sys
sys.path.append(f"{HOME}/ByteTrack")

import yolox
print("yolox.__version__:", yolox.__version__)

In [None]:
!pip install lap
from yolox.tracker.byte_tracker import BYTETracker, STrack
from onemetric.cv.utils.iou import box_iou_batch
from dataclasses import dataclass


@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.25
    track_buffer: int = 30
    match_thresh: float = 0.8
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 1.0
    mot20: bool = False

Install Roboflow Supervision

In [None]:
!pip install supervision==0.1.0


from IPython import display
display.clear_output()


import supervision
print("supervision.__version__:", supervision.__version__)

In [None]:
from supervision.draw.color import ColorPalette
from supervision.geometry.dataclasses import Point
from supervision.video.dataclasses import VideoInfo
from supervision.video.source import get_video_frames_generator
from supervision.video.sink import VideoSink
from supervision.notebook.utils import show_frame_in_notebook
from supervision.tools.detections import Detections, BoxAnnotator
from supervision.tools.line_counter import LineCounter, LineCounterAnnotator

Tracking Utils

In [None]:
from typing import List

import numpy as np


# converts Detections into format that can be consumed by match_detections_with_tracks function
def detections2boxes(detections: Detections) -> np.ndarray:
    return np.hstack((
        detections.xyxy,
        detections.confidence[:, np.newaxis]
    ))


# converts List[STrack] into format that can be consumed by match_detections_with_tracks function
def tracks2boxes(tracks: List[STrack]) -> np.ndarray:
    return np.array([
        track.tlbr
        for track
        in tracks
    ], dtype=float)


# matches our bounding boxes with predictions
def match_detections_with_tracks(
    detections: Detections,
    tracks: List[STrack]
) -> Detections:
    if not np.any(detections.xyxy) or len(tracks) == 0:
        return np.empty((0,))

    tracks_boxes = tracks2boxes(tracks=tracks)
    iou = box_iou_batch(tracks_boxes, detections.xyxy)
    track2detection = np.argmax(iou, axis=1)

    tracker_ids = [None] * len(detections)

    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            tracker_ids[detection_index] = tracks[tracker_index].track_id

    return tracker_ids

Loading the pretrained YOLOv8 model

In [None]:
#the uploaded weights are unzipped
%cd {HOME}
!unzip weights.zip

In [None]:
#assign the best weights to the MODEL
MODEL = "runs/detect/train/weights/best.pt"

In [None]:
from ultralytics import YOLO

model = YOLO(MODEL)
model.fuse()

Predicting and Annotating a single frame from the video

In [None]:
# mapping class_id to class_name
CLASS_NAMES_DICT = model.model.names
# class_ids of interest - bottles,cans,cigarettes
CLASS_ID = [1, 2, 3]

In [None]:
# frame generator
generator = get_video_frames_generator(SOURCE_VIDEO_PATH)
# BoxAnnotator
box_annotator = BoxAnnotator(color=ColorPalette(), thickness=4, text_thickness=4, text_scale=2)
# acquiring a frame
iterator = iter(generator)
frame = next(iterator)
# predicting a single frame using the model
results = model(frame)
detections = Detections(
    xyxy=results[0].boxes.xyxy.cpu().numpy(),
    confidence=results[0].boxes.conf.cpu().numpy(),
    class_id=results[0].boxes.cls.cpu().numpy().astype(int)
)
# formatting the labels
labels = [
    f"{CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
    for _, confidence, class_id, tracker_id
    in detections
]
# annotating and displaying the frame
frame = box_annotator.annotate(frame=frame, detections=detections, labels=labels)

%matplotlib inline
show_frame_in_notebook(frame, (16, 16))

Predicting and Annotating the entire video

In [None]:
# setting the starting and ending points of the line counter as per the dimensions and the region of interest from the previous frame
LINE_START = Point(600, 0)
LINE_END = Point(3840-2840, 0)

#set the name of the result file
TARGET_VIDEO_PATH = f"{HOME}/can-rollv1-result.mp4"

In [None]:
VideoInfo.from_video_path(SOURCE_VIDEO_PATH)

In [None]:
from tqdm.notebook import tqdm


# creating the BYTETracker instance
byte_tracker = BYTETracker(BYTETrackerArgs())
# creating the VideoInfo instance
video_info = VideoInfo.from_video_path(SOURCE_VIDEO_PATH)
# creating the frame generator
generator = get_video_frames_generator(SOURCE_VIDEO_PATH)
# creating the LineCounter instance
line_counter = LineCounter(start=LINE_START, end=LINE_END)
# creating instances of BoxAnnotator and LineCounterAnnotator
box_annotator = BoxAnnotator(color=ColorPalette(), thickness=4, text_thickness=4, text_scale=2)
line_annotator = LineCounterAnnotator(thickness=4, text_thickness=4, text_scale=2)

# open target video file
with VideoSink(TARGET_VIDEO_PATH, video_info) as sink:

    for frame in tqdm(generator, total=video_info.total_frames):
        results = model(frame)
        detections = Detections(
            xyxy=results[0].boxes.xyxy.cpu().numpy(),
            confidence=results[0].boxes.conf.cpu().numpy(),
            class_id=results[0].boxes.cls.cpu().numpy().astype(int)
        )
        # filtering out unwanted classes
        mask = np.array([class_id in CLASS_ID for class_id in detections.class_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        # tracking detections
        tracks = byte_tracker.update(
            output_results=detections2boxes(detections=detections),
            img_info=frame.shape,
            img_size=frame.shape
        )
        tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
        detections.tracker_id = np.array(tracker_id)
        mask = np.array([tracker_id is not None for tracker_id in detections.tracker_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        labels = [
            f"#{tracker_id} {CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
            for _, confidence, class_id, tracker_id
            in detections
        ]
        # updating the line counter
        line_counter.update(detections=detections)
        # annotating and displaying frame
        frame = box_annotator.annotate(frame=frame, detections=detections, labels=labels)
        line_annotator.annotate(frame=frame, line_counter=line_counter)
        sink.write_frame(frame)
        with open("output.txt", "w") as file:
            # Print the variable into the output file
            print(detections, file=file)
        sink.write_frame(frame)