## Setup and Dependencies

In [1]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -qr requirements.txt comet_ml
!pip install av torch opencv-python

import torch
import utils
import av
import cv2
import os
from PIL import Image
display = utils.notebook_init()

YOLOv5 🚀 v7.0-339-g150a1a31 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)


Setup complete ✅ (2 CPUs, 12.7 GB RAM, 30.3/78.2 GB disk)


In [13]:
import json

## Unzipping Dataset


In [4]:
!unzip -q ../logo.zip -d ../

## Training the Model

In [5]:
!python train.py --img 640 --batch 4 --epochs 400 --data custom.yaml --weights yolov5s.pt --nosave --cache

2024-07-13 11:49:56.573481: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-13 11:49:56.573539: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-13 11:49:56.575531: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data=custom.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=400, batch_size=4, imgsz=640, rect=False, resume=False, nosave=True, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data/hyps, resume_evolve=None, bucket=, cache=ram, image_weights=False, device=, multi_scale=False, single_cls

In [6]:
model = torch.hub.load('ultralytics/yolov5', 'custom', path='runs/train/exp2/weights/last.pt')

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to /root/.cache/torch/hub/master.zip
YOLOv5 🚀 v7.0-339-g150a1a31 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


## Downloading Trained Model


In [8]:
torch.save(model.state_dict(), 'yolov5_model.pth')

In [9]:
from google.colab import files
files.download('yolov5_model.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# import torch
# model = torch.load('yolov5_entire_model.pth')
# model.eval()

## Logo Detection in Videos

### Extracting Frames and Timestamps


In [24]:
# Function to extract frames and timestamps from video
def extract_frames(video_path):
    container = av.open(video_path)
    frames = []
    timestamps = []
    for frame in container.decode(video=0):
        frame_img = frame.to_image()  # PIL image
        frames.append(frame_img)
        timestamps.append(frame.time)
    return frames, timestamps

### Detecting Logos

In [25]:
# Function to detect logos in frames
def detect_logos(frames, timestamps, model):
    pepsi_timestamps = []
    cocacola_timestamps = []
    for i, frame in enumerate(frames):
        results = model(frame)
        for detection in results.xyxy[0]:
            class_id = int(detection[5])
            if class_id == 0:  # 0 is the class ID for Pepsi
                pepsi_timestamps.append(timestamps[i])
            elif class_id == 1:  # 1 is the class ID for CocaCola
                cocacola_timestamps.append(timestamps[i])
    return pepsi_timestamps, cocacola_timestamps

### Generating JSON Output

In [26]:
# Function to generate JSON output
def generate_json_output(pepsi_timestamps, cocacola_timestamps, output_path):
    output_data = {
        "Pepsi_pts": pepsi_timestamps,
        "CocaCola_pts": cocacola_timestamps
    }
    with open(output_path, 'w') as json_file:
        json.dump(output_data, json_file, indent=4)

### Processing Video and Outputting JSON


In [27]:
# Main function to process the video
def process_video(video_path, output_json_path):
    frames, timestamps = extract_frames(video_path)
    pepsi_timestamps, cocacola_timestamps = detect_logos(frames, timestamps, model)
    generate_json_output(pepsi_timestamps, cocacola_timestamps, output_json_path)

In [35]:
video_path = '/content/video_demo.mp4'
output_json_path = '/content/output.json'

In [38]:
process_video(video_path, output_json_path)

In [39]:
def print_json_contents(json_path):
    with open(json_path, 'r') as json_file:
        data = json.load(json_file)
        print(json.dumps(data, indent=4))

print_json_contents(output_json_path)

{
    "Pepsi_pts": [
        0.0,
        0.03333333333333333,
        0.06666666666666667,
        0.1,
        0.13333333333333333,
        0.16666666666666666,
        0.2,
        0.23333333333333334,
        0.26666666666666666,
        0.3,
        0.3333333333333333,
        0.36666666666666664,
        0.4,
        0.43333333333333335,
        0.4666666666666667,
        0.5,
        0.5333333333333333,
        0.5666666666666667,
        0.6,
        0.6333333333333333,
        0.6666666666666666,
        0.7,
        0.7333333333333333,
        0.7666666666666667,
        0.8,
        0.8333333333333334,
        0.8666666666666667,
        0.9,
        0.9333333333333333,
        0.9666666666666667,
        1.0,
        1.0333333333333334,
        1.0666666666666667,
        1.1,
        1.1333333333333333,
        1.1666666666666667,
        1.2,
        1.2333333333333334,
        1.2666666666666666,
        1.3,
        1.3333333333333333,
        1.3666666666666667,
     

## Running Detection Script


This way we can see the logo detection in videos

In [40]:
!python detect.py --weights runs/train/exp2/weights/last.pt --img 640 --conf 0.25 --source ../video_demo.mp4

[34m[1mdetect: [0mweights=['runs/train/exp2/weights/last.pt'], source=../video_demo.mp4, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.25, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-339-g150a1a31 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)

Fusing layers... 
Model summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs
video 1/1 (1/1526) /content/video_demo.mp4: 640x384 1 Pepsi, 1 CocaCola, 57.2ms
video 1/1 (2/1526) /content/video_demo.mp4: 640x384 1 Pepsi, 1 CocaCola, 7.6ms
video 1/1 (3/1526) /content/video_demo.mp4: 640x384 1 Pepsi, 1 CocaCola, 8.0ms
video 1/1 (4/1526) /content/video_demo.mp4: 640x384 1 Pepsi, 1 CocaCola, 8.1ms
