In [1]:
import cv2
import torch
import cv2
import os
from pathlib import Path
from IPython.display import display, Image as IPImage, HTML
from PIL import Image
import base64

In [None]:
using_colab = True

if using_colab:

    import torch
    import torchvision

    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)
    print("CUDA is available:", torch.cuda.is_available())

    import sys

    !{sys.executable} -m pip install opencv-python matplotlib
    !{sys.executable} -m pip install 'git+https://github.com/facebookresearch/sam2.git'

    !mkdir -p videos
    !wget -P videos https://dl.fbaipublicfiles.com/segment_anything_2/assets/bedroom.zip
    !unzip -d videos videos/bedroom.zip

    !mkdir -p ../checkpoints/
    !wget -P ../checkpoints/ https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt

PyTorch version: 2.6.0+cu124
Torchvision version: 0.21.0+cu124
CUDA is available: True
Collecting git+https://github.com/facebookresearch/sam2.git
  Cloning https://github.com/facebookresearch/sam2.git to /tmp/pip-req-build-mlchzx_b
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/sam2.git /tmp/pip-req-build-mlchzx_b
  Resolved https://github.com/facebookresearch/sam2.git to commit 2b90b9f5ceec907a1c18123530e92e794ad901a4
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
--2025-07-17 11:02:01--  https://dl.fbaipublicfiles.com/segment_anything_2/assets/bedroom.zip
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 3.169.202.3, 3.169.202.78, 3.169.202.87, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|3.169.202.3|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12688166 (12M

In [None]:
!git clone https://github.com/ultralytics/yolov5

%cd yolov5

!pip install -r requirements.txt

In [None]:
# Folder path
folder_path = "/content/videos/bedroom"
output_gif_path = "/content/videos/bedroom.gif"
duration = 100  # Duration per frame in milliseconds

# Get sorted image files
image_files = sorted([f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp'))])

# Create full paths and open images
frames = []
for image_file in image_files:

    image_path = os.path.join(folder_path, image_file)
    img = Image.open(image_path).convert('RGB')
    frames.append(img)

# Save as GIF
if frames:
    frames[0].save(
        output_gif_path,
        format='GIF',
        append_images=frames[1:],
        save_all=True,
        duration=duration,
        loop=0
    )
    print(f"GIF saved at: {output_gif_path}")
else:
    print("No images to convert.")


In [None]:
# Read GIF and convert to base64
with open(output_gif_path, "rb") as gif_file:
    gif_base64 = base64.b64encode(gif_file.read()).decode("utf-8")

# Embed in HTML
html_code = f'''
<h3>Generated GIF:</h3>
<img src="data:image/gif;base64,{gif_base64}" alt="Generated GIF">
'''

# Display HTML
display(HTML(html_code))

In [None]:
# Load YOLOv5 model (pretrained on COCO)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Folder of images
folder_path = "/content/videos/bedroom"
output_path = "/content/videos/yolo_output"
os.makedirs(output_path, exist_ok=True)

# Get sorted image files
image_files = sorted([f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp'))])

# Loop through and detect objects
for image_file in image_files:
    image_path = os.path.join(folder_path, image_file)
    results = model(image_path)

    # Plot and save results
    results.render()  # Draw bounding boxes on the image
    result_img = results.ims[0]  # Image with boxes

    # Save the result image
    out_image_path = os.path.join(output_path, image_file)
    cv2.imwrite(out_image_path, result_img[:, :, ::-1])  # Convert RGB to BGR for OpenCV

print("YOLO detection complete. Output saved to:", output_path)


In [None]:
output_gif_path = "/content/videos/yolo_detected.gif"
image_files = sorted([f for f in os.listdir(output_path) if f.lower().endswith(('.jpg', '.png', '.jpeg', '.bmp'))])
frames = [Image.open(os.path.join(output_path, f)).convert("RGB") for f in image_files]

if frames:
    frames[0].save(
        output_gif_path,
        format='GIF',
        append_images=frames[1:],
        save_all=True,
        duration=100,
        loop=0
    )
    print("YOLO detection GIF saved at:", output_gif_path)


In [None]:

# Read GIF and convert to base64
with open(output_gif_path, "rb") as gif_file:
    gif_base64 = base64.b64encode(gif_file.read()).decode("utf-8")

# Embed in HTML
html_code = f'''
<h3>Generated GIF:</h3>
<img src="data:image/gif;base64,{gif_base64}" alt="Generated GIF">
'''

# Display HTML
display(HTML(html_code))