<a href="https://colab.research.google.com/github/SudiptenduBanerjee/3d-video-project/blob/main/3d_video_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import cv2
import os
import torch
import numpy as np
from PIL import Image
from google.colab import files

# Step 1: Install dependencies
!pip install torch torchvision opencv-python-headless numpy timm

# Step 2: Upload video
uploaded = files.upload()
video_path = list(uploaded.keys())[0]

# Step 3: Extract frames
def extract_frames(video_path, output_dir="frames"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_paths = []
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_dir, f"frame_{frame_count:04d}.png")
        cv2.imwrite(frame_path, frame)
        frames.append(frame)
        frame_paths.append(frame_path)
        frame_count += 1

    cap.release()
    return frames, frame_paths

frames, frame_paths = extract_frames(video_path)
print(f"Extracted {len(frames)} frames.")

# Step 4: Depth estimation with MiDaS
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
midas.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas.to(device)
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
transform = midas_transforms.small_transform

def generate_depth_map(frame_path):
    img = cv2.imread(frame_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    input_batch = transform(img).to(device)

    with torch.no_grad():
        prediction = midas(input_batch)
        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    depth_map = prediction.cpu().numpy()
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0
    depth_map = depth_map.astype(np.uint8)
    return depth_map

depth_maps = [generate_depth_map(frame_path) for frame_path in frame_paths]
print("Generated depth maps for all frames.")

# Step 5: Create stereoscopic pairs
def create_stereo_pair(frame, depth_map, max_shift=20):
    height, width = frame.shape[:2]
    left_image = np.zeros_like(frame)
    right_image = np.zeros_like(frame)

    depth_map = depth_map.astype(np.float32) / 255.0

    for y in range(height):
        for x in range(width):
            shift = int(max_shift * (1 - depth_map[y, x]))
            left_x = x + shift
            if 0 <= left_x < width:
                left_image[y, left_x] = frame[y, x]
            right_x = x - shift
            if 0 <= right_x < width:
                right_image[y, right_x] = frame[y, x]

    left_image = cv2.inpaint(left_image, (left_image == 0).all(axis=2).astype(np.uint8), 3, cv2.INPAINT_NS)
    right_image = cv2.inpaint(right_image, (right_image == 0).all(axis=2).astype(np.uint8), 3, cv2.INPAINT_NS)

    return left_image, right_image

stereo_pairs = [create_stereo_pair(frame, depth_map) for frame, depth_map in zip(frames, depth_maps)]
print("Created stereoscopic pairs for all frames.")

# Step 6: Create anaglyph images
def create_anaglyph(left_image, right_image):
    anaglyph = np.zeros_like(left_image)
    anaglyph[:, :, 0] = left_image[:, :, 0]
    anaglyph[:, :, 1] = right_image[:, :, 1]
    anaglyph[:, :, 2] = right_image[:, :, 2]
    return anaglyph

anaglyph_frames = [create_anaglyph(left, right) for left, right in stereo_pairs]
print("Generated anaglyph frames.")

# Step 7: Reconstruct video
def create_video(frames, output_path="output_3d_video.mp4", fps=30):
    height, width = frames[0].shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    for frame in frames:
        out.write(frame)

    out.release()
    return output_path

cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()

output_video_path = create_video(anaglyph_frames, fps=fps)
print(f"3D video saved as {output_video_path}")

# Step 8: Download the output video
files.download(output_video_path)



Saving 140637-775595899_medium.mp4 to 140637-775595899_medium.mp4
Extracted 136 frames.


Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Loading weights:  None


Using cache found in /root/.cache/torch/hub/rwightman_gen-efficientnet-pytorch_master
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


Generated depth maps for all frames.
Created stereoscopic pairs for all frames.
Generated anaglyph frames.
3D video saved as output_3d_video.mp4


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>