 **TASK 2**

In [5]:
# ─── Cell 1: Install dependencies ───────────────────────────
!apt-get update -qq && apt-get install -qq -y ffmpeg
!pip install -q opencv-python torch torchvision


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [11]:
# Cell 2 ▶️ Trim, Subtitle & Extract Frames

import os, subprocess, glob

# ─ Config ─────────────────────────────────────────────────────────────
VIDEO_PATH   = "/content/sad story of cats😥catlover cutecats cats animal ai shorts.mp4"      # ← your uploaded file
TRIM_START   = 5                         # in seconds
TRIM_END     = 15                        # in seconds
TRIMMED      = "/content/trimmed.mp4"
SUBTITLED    = "/content/subtitled.mp4"
FRAMES_DIR   = "/content/frames01"
os.makedirs(FRAMES_DIR, exist_ok=True)

# 1) Precise trim (seek BEFORE input, re-encode to avoid keyframe gaps)
subprocess.run([
    "ffmpeg", "-y",
    "-ss", str(TRIM_START),
    "-to", str(TRIM_END),
    "-i", VIDEO_PATH,
    "-c:v", "libx264",    # re-encode video
    "-c:a", "copy",       # copy audio
    TRIMMED
], check=True)

# 2) Single subtitle line (using re-encoded trimmed clip)
subtitle_txt = "Trimmed from 5s to 15s"
draw = (
    "drawtext=fontfile=/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf:"
    f"text='{subtitle_txt}':fontsize=24:fontcolor=white@1:box=1:boxcolor=black@0.6:"
    "x=(w-text_w)/2:y=h-50"
)
subprocess.run([
    "ffmpeg", "-y",
    "-i", TRIMMED,
    "-vf", draw,
    "-c:a", "copy",
    SUBTITLED
], check=True)

# 3) Extract 1 frame per second from subtitled video
subprocess.run([
    "ffmpeg", "-y",
    "-i", SUBTITLED,
    "-vf", "fps=1",
    os.path.join(FRAMES_DIR, "frame_%03d.png")
], check=True)

print("✅ Trimmed, subtitled and extracted frames to", FRAMES_DIR)


✅ Trimmed, subtitled and extracted frames to /content/frames01


In [15]:
# Cell 3 ▶️ Depth Estimation with MiDaS

import os, cv2, torch, numpy as np
from torchvision import transforms
from google.colab import files

# 1) Load MiDaS model + transforms
model = torch.hub.load("intel-isl/MiDaS", "MiDaS_small").eval()
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
small_transform  = midas_transforms.small_transform

# 2) Prepare output folder
DEPTH_DIR = "/content/depth_maps"
os.makedirs(DEPTH_DIR, exist_ok=True)

# 3) Depth estimator that handles batched/unbatched output
def estimate_depth(frame_bgr):
    rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    inp = small_transform(rgb)
    # if transform already batched, skip extra unsqueeze
    if inp.ndim == 4:
        batch = inp
    else:
        batch = inp.unsqueeze(0)
    with torch.no_grad():
        pred = model(batch).squeeze().cpu().numpy()
    norm = (pred - pred.min()) / (pred.max() - pred.min())
    return (norm * 255).astype(np.uint8)

# 4) Loop through each extracted frame
frames = sorted(os.listdir("/content/frames01"))
for fname in frames:
    path = f"/content/frames01/{fname}"
    frame = cv2.imread(path)
    depth = estimate_depth(frame)
    out_name = fname.replace("frame", "depth")
    cv2.imwrite(f"{DEPTH_DIR}/{out_name}", depth)
    print("✔ Processed", fname)

# 5) Zip & download depth maps
!zip -q -r /content/depth_results.zip {DEPTH_DIR}
files.download("/content/depth_results.zip")


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /root/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master


Loading weights:  None


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


✔ Processed frame_001.png
✔ Processed frame_002.png
✔ Processed frame_003.png
✔ Processed frame_004.png
✔ Processed frame_005.png
✔ Processed frame_006.png
✔ Processed frame_007.png
✔ Processed frame_008.png
✔ Processed frame_009.png
✔ Processed frame_010.png


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>