In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import cv2
import numpy as np

# -----------------------------
# Model Setup (DeepLabV3)
# -----------------------------
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True).to(device).eval()

# Person class ID = 15 in COCO dataset
PERSON_CLASS = 15

# -----------------------------
# Video Settings
# -----------------------------
input_video_path = "/content/drive/MyDrive/video/some people walking.mp4"
output_video_path = "/content/drive/MyDrive/video/some_ppl_opt.mp4"

cap = cv2.VideoCapture(input_video_path)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

# -----------------------------
# Transform for model input
# -----------------------------
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# -----------------------------
# Font Setup
# -----------------------------
try:
    font_size = 180
    font_path = "/content/drive/MyDrive/fonts/Aesthetic Moment  Italic Personal Used.ttf"
    font = ImageFont.truetype(font_path, font_size)
except:
    print("Custom font not found, using default")
    font = ImageFont.load_default()

# -----------------------------
# Frame Processing Loop
# -----------------------------
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to PIL image
    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    # Transform and run inference
    input_tensor = transform(pil_image).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(input_tensor)['out'][0]
    mask = output.argmax(0).byte().cpu()

    # Create binary mask for person
    binary_mask = (mask == PERSON_CLASS).numpy().astype('uint8') * 255
    binary_mask_image = Image.fromarray(binary_mask)

    # Blur background
    blurred_image = pil_image.filter(ImageFilter.GaussianBlur(2))

    # Draw text on blurred background
    draw = ImageDraw.Draw(blurred_image)
    watermark_text = '''Bold Moves'''

    text_bbox = draw.textbbox((0, 0), watermark_text, font=font)
    text_width = text_bbox[2] - text_bbox[0]
    text_height = text_bbox[3] - text_bbox[1]

    offset = 100
    position = (
        (blurred_image.width - text_width) // 2,
        (blurred_image.height - text_height) // 2 - offset
    )

    draw.text(position, watermark_text, fill=(0, 0, 0, 128), font=font)

    # Composite final frame
    final_image = Image.composite(pil_image, blurred_image, binary_mask_image)

    # Convert back to OpenCV format
    final_frame = cv2.cvtColor(np.array(final_image), cv2.COLOR_RGB2BGR)
    out.write(final_frame)

# Cleanup
cap.release()
out.release()
print("✅ Video saved to:", output_video_path)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


✅ Video saved to: /content/drive/MyDrive/video/some_ppl_opt.mp4
