
# SBS 3D Video Generation Pipeline

This notebook outlines the process of converting a monocular video into a side-by-side (SBS) 3D video.



## Setup and Preparation

Import necessary libraries and define the input video path.


In [1]:
import timeit
import os
import subprocess

# Define the path to the input video
input_video_path = 'data_in/input.mp4'



## Extract Frames from Video

Use ffmpeg to extract frames from the input video.


In [None]:

!ffmpeg -i {input_video_path} -q:v 2 datasets/data_in/frame%d.jpg



## Generate Depth Images

(Placeholder: Run the depth estimation model/tool here.)



## Image Preprocessing

Rename and pair images as needed.


In [None]:
# Example for renaming images (adjust according to your script)
!python sbs_rename_directory.py

In [2]:
import os

source_dir = "./datasets/data_in/depth"
target_prefix = "depth"

def get_frame_number(filename):
    return int(filename.split("depth")[1].split(".")[0])


file_list = os.listdir(source_dir)
frame_files = sorted([f for f in file_list if f.startswith("depth") and f.endswith(".png")], key=get_frame_number)
counter = 0

for filename in frame_files:
    new_name = f"{target_prefix}{counter}.png"
    os.rename(os.path.join(source_dir, filename), os.path.join(source_dir, new_name))
    counter += 1


## Generate Stereo Views

Run the script to generate left and right eye views.


In [None]:
!python sbs_generate_stereoviews.py data_in/ data_out/  

In [None]:
%%time

import cv2
import numpy as np
import os

def process_images(input_dir, output_dir, scale_factor):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    color_images = sorted([f for f in os.listdir(input_dir) if f.startswith('color')])
    depth_images = sorted([f for f in os.listdir(input_dir) if f.startswith('depth')])

    for color_image_path, depth_image_path in zip(color_images, depth_images):
        color_image = cv2.imread(os.path.join(input_dir, color_image_path))
        depth_map = cv2.imread(os.path.join(input_dir, depth_image_path), cv2.IMREAD_GRAYSCALE)

        if color_image is None:
            print(f"Error: Color image not found at {os.path.join(input_dir, color_image_path)}")
            continue

        if depth_map is None:
            print(f"Error: Depth map not found at {os.path.join(input_dir, depth_image_path)}")
            continue

        # Function to shift pixels based on depth map
        def shift_pixels(image, depth_map, direction):
            shifted_image = np.zeros_like(image)
            for y in range(image.shape[0]):
                for x in range(image.shape[1]):
                    disparity = calculate_disparity(depth_map[y, x])
                    new_x = x + disparity * direction
                    if 0 <= new_x < image.shape[1]:
                        shifted_image[y, new_x] = image[y, x]
            return shifted_image

        # Calculate disparity (example function, adjust as needed)
        def calculate_disparity(depth_value):
            # Simple linear mapping, adjust the scale factor as needed
            return int(depth_value * scale_factor)

        # Create left and right eye images
        left_eye_image = shift_pixels(color_image, depth_map, 1)
        right_eye_image = shift_pixels(color_image, depth_map, -1)

        frame_number = color_image_path.split('color')[1].split('.')[0]
        left_eye_output_path = os.path.join(output_dir, f'leftEye/leftEye{frame_number}.jpg')
        right_eye_output_path = os.path.join(output_dir, f'rightEye/rightEye{frame_number}.jpg')

        # Save the left and right eye images
        cv2.imwrite(left_eye_output_path, left_eye_image)
        cv2.imwrite(right_eye_output_path, right_eye_image)

        print(f"Processed frame {frame_number}.")

# Example usage
process_images('./datasets/data_in/', './datasets/data_out/', 0.05)

Processed frame 0.
Processed frame 1.
Processed frame 10.
Processed frame 100.
Processed frame 101.
Processed frame 102.
Processed frame 103.
Processed frame 104.
Processed frame 105.
Processed frame 106.
Processed frame 11.
Processed frame 12.
Processed frame 13.
Processed frame 14.
Processed frame 15.
Processed frame 16.
Processed frame 17.
Processed frame 18.
Processed frame 19.
Processed frame 2.
Processed frame 20.
Processed frame 21.
Processed frame 22.
Processed frame 23.
Processed frame 24.
Processed frame 25.
Processed frame 26.
Processed frame 27.
Processed frame 28.
Processed frame 29.
Processed frame 3.
Processed frame 30.
Processed frame 31.
Processed frame 32.
Processed frame 33.
Processed frame 34.
Processed frame 35.
Processed frame 36.
Processed frame 37.
Processed frame 38.
Processed frame 39.
Processed frame 4.
Processed frame 40.
Processed frame 41.
Processed frame 42.
Processed frame 43.
Processed frame 44.
Processed frame 45.
Processed frame 46.
Processed frame 47

In [None]:
%time

import cv2
import numpy as np
import os
from concurrent.futures import ThreadPoolExecutor, as_completed

def shift_pixels(image, depth_map, direction, scale_factor):
    shifted_image = np.zeros_like(image)
    for y in range(image.shape[0]):
        for x in range(image.shape[1]):
            disparity = int(depth_map[y, x] * scale_factor)
            new_x = x + disparity * direction
            if 0 <= new_x < image.shape[1]:
                shifted_image[y, new_x] = image[y, x]
    return shifted_image

def process_single_image_pair(color_image_path, depth_image_path, input_dir, output_dir, scale_factor):
    try:
        color_image = cv2.imread(os.path.join(input_dir, color_image_path))
        depth_map = cv2.imread(os.path.join(input_dir, depth_image_path), cv2.IMREAD_GRAYSCALE)

        if color_image is None or depth_map is None:
            return f"Error: Image not found at {os.path.join(input_dir, color_image_path)} or {os.path.join(input_dir, depth_image_path)}"

        left_eye_image = shift_pixels(color_image, depth_map, 1, scale_factor)
        right_eye_image = shift_pixels(color_image, depth_map, -1, scale_factor)

        frame_number = color_image_path.split('color')[1].split('.')[0]
        left_eye_output_path = os.path.join(output_dir, f'leftEye{frame_number}.jpg')
        right_eye_output_path = os.path.join(output_dir, f'rightEye{frame_number}.jpg')

        cv2.imwrite(left_eye_output_path, left_eye_image)
        cv2.imwrite(right_eye_output_path, right_eye_image)

        return f"Processed frame {frame_number}."
    except Exception as e:
        return f"Error processing {color_image_path}: {e}"

def process_images(input_dir, output_dir, scale_factor):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    color_images = sorted([f for f in os.listdir(input_dir) if f.startswith('color')])
    depth_images = sorted([f for f in os.listdir(input_dir) if f.startswith('depth')])

    with ThreadPoolExecutor(max_workers=8) as executor:
        futures = [executor.submit(process_single_image_pair, color_image_path, depth_image_path, input_dir, output_dir, scale_factor) for color_image_path, depth_image_path in zip(color_images, depth_images)]

    for future in as_completed(futures):
        print(future.result())

# Example usage
process_images('./datasets/data_in/', './datasets/data_out_fast/', 0.05)



## Inpainting Process

Run the script for inpainting left and right eye images.


In [None]:
!python sbs_inpaint_stereoviews.py data_out/ data_out_final/

In [12]:
import cv2
import numpy as np
import os

def create_mask_for_black_streaks(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Use adaptive thresholding to better capture the black streaks
    mask = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 3, 8)
    
    # Dilate the mask to include the edges of the black streaks
    kernel = np.ones((5,5), np.uint8)
    mask = cv2.dilate(mask, kernel, iterations=1)
    
    return mask

def inpaint_black_streaks(image, mask):
    # Inpaint the black streaks in the image
    inpainted_image = cv2.inpaint(image, mask, 5, cv2.INPAINT_TELEA)
    
    return inpainted_image

def process_images(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    left_path = input_dir + "leftEye/"
    right_path = input_dir + "rightEye/"
    left_eye_images = sorted([f for f in os.listdir(left_path) if f.startswith('leftEye')])
    right_eye_images = sorted([f for f in os.listdir(right_path) if f.startswith('rightEye')])

    for left_eye_image_path, right_eye_image_path in zip(left_eye_images, right_eye_images):
        left_eye_image = cv2.imread(os.path.join(left_path, left_eye_image_path))
        right_eye_image = cv2.imread(os.path.join(right_path, right_eye_image_path))

        if left_eye_image is None or right_eye_image is None:
            print(f"Error: Image not found at {os.path.join(input_dir, left_eye_image_path)} or {os.path.join(input_dir, right_eye_image_path)}")
            continue

        # Create masks for the black streaks in both left and right eye images
        left_eye_mask = create_mask_for_black_streaks(left_eye_image)
        right_eye_mask = create_mask_for_black_streaks(right_eye_image)

        # Inpaint the black streaks in both left and right eye images
        left_eye_post = inpaint_black_streaks(left_eye_image, left_eye_mask)
        right_eye_post = inpaint_black_streaks(right_eye_image, right_eye_mask)

        frame_number = left_eye_image_path.split('leftEye')[1].split('.')[0]
        left_eye_post_output_path = os.path.join(output_dir + "leftEye/", f'leftEyePost{frame_number}.jpg')
        right_eye_post_output_path = os.path.join(output_dir + "rightEye/", f'rightEyePost{frame_number}.jpg')

        # Save the processed images and masks
        cv2.imwrite(left_eye_post_output_path, left_eye_post)
        cv2.imwrite(right_eye_post_output_path, right_eye_post)

        print(f"Processed frame {frame_number}.")

# Example usage
process_images('./datasets/data_out/', './datasets/data_out_post/')


Processed frame 0.
Processed frame 1.
Processed frame 10.
Processed frame 100.
Processed frame 101.
Processed frame 102.
Processed frame 103.
Processed frame 104.
Processed frame 105.
Processed frame 106.
Processed frame 11.
Processed frame 12.
Processed frame 13.
Processed frame 14.
Processed frame 15.
Processed frame 16.
Processed frame 17.
Processed frame 18.
Processed frame 19.
Processed frame 2.
Processed frame 20.
Processed frame 21.
Processed frame 22.
Processed frame 23.
Processed frame 24.
Processed frame 25.
Processed frame 26.
Processed frame 27.
Processed frame 28.
Processed frame 29.
Processed frame 3.
Processed frame 30.
Processed frame 31.
Processed frame 32.
Processed frame 33.
Processed frame 34.
Processed frame 35.
Processed frame 36.
Processed frame 37.
Processed frame 38.
Processed frame 39.
Processed frame 4.
Processed frame 40.
Processed frame 41.
Processed frame 42.
Processed frame 43.
Processed frame 44.
Processed frame 45.
Processed frame 46.
Processed frame 47


## Create Videos from Images

Use ffmpeg to create left and right eye videos.


In [14]:

!ffmpeg -framerate 30 -i './datasets/data_out_post/leftEye/leftEyePost%d.jpg' -c:v libx264 -pix_fmt yuv420p -vf "fps=30" left_eye.mp4
!ffmpeg -framerate 30 -i './datasets/data_out_post/rightEye/rightEyePost%d.jpg' -c:v libx264 -pix_fmt yuv420p -vf "fps=30" right_eye.mp4


ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.0.40.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.0_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enabl

frame=    0 fps=0.0 q=0.0 size=       0kB time=-577014:32:22.77 bitrate=  -0.0kbits/s speed=N/A    frame=  107 fps=0.0 q=-1.0 Lsize=     803kB time=00:00:03.46 bitrate=1898.2kbits/s speed=7.15x    
video:801kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.263294%
[1;36m[libx264 @ 0x148706ae0] [0mframe I:3     Avg QP:21.59  size: 34824
[1;36m[libx264 @ 0x148706ae0] [0mframe P:28    Avg QP:23.36  size: 12306
[1;36m[libx264 @ 0x148706ae0] [0mframe B:76    Avg QP:25.14  size:  4877
[1;36m[libx264 @ 0x148706ae0] [0mconsecutive B-frames:  4.7%  1.9%  0.0% 93.5%
[1;36m[libx264 @ 0x148706ae0] [0mmb I  I16..4: 14.6% 75.3% 10.1%
[1;36m[libx264 @ 0x148706ae0] [0mmb P  I16..4:  7.3% 13.5%  0.5%  P16..4: 40.9% 11.8%  3.7%  0.0%  0.0%    skip:22.2%
[1;36m[libx264 @ 0x148706ae0] [0mmb B  I16..4:  0.6%  0.8%  0.0%  B16..8: 43.1%  4.7%  0.6%  direct: 1.2%  skip:49.0%  L0:47.5% L1:50.6% BI: 1.8%
[1;36m[libx264 @ 0x148706ae0] [0m8x8 transform int


## Merge Videos and Inject Metadata

Combine the left and right eye videos into an SBS video and inject 3D metadata.


In [15]:

!ffmpeg -i left_eye.mp4 -i right_eye.mp4 -filter_complex "[0:v][1:v]hstack=inputs=2[v]" -map "[v]" output.SBS.mp4
!ffmpeg -i output.SBS.mp4 -vf "scale=2*iw:ih" -c:v libx264 -x264opts "frame-packing=3" -aspect 2:1 output_final_sbs.mp4


ffmpeg version 6.0 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.0.40.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.0_1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enabl

frame=  107 fps= 85 q=-1.0 Lsize=    2218kB time=00:00:03.46 bitrate=5240.9kbits/s speed=2.76x     
video:2216kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.096701%
[1;36m[libx264 @ 0x14b106320] [0mframe I:3     Avg QP:20.01  size: 95943
[1;36m[libx264 @ 0x14b106320] [0mframe P:29    Avg QP:22.13  size: 35473
[1;36m[libx264 @ 0x14b106320] [0mframe B:75    Avg QP:24.16  size: 12688
[1;36m[libx264 @ 0x14b106320] [0mconsecutive B-frames:  4.7%  5.6%  0.0% 89.7%
[1;36m[libx264 @ 0x14b106320] [0mmb I  I16..4: 20.7% 73.0%  6.2%
[1;36m[libx264 @ 0x14b106320] [0mmb P  I16..4:  9.7% 13.2%  0.3%  P16..4: 44.0%  7.1%  1.0%  0.0%  0.0%    skip:24.7%
[1;36m[libx264 @ 0x14b106320] [0mmb B  I16..4:  0.5%  0.5%  0.0%  B16..8: 37.4%  2.3%  0.1%  direct: 0.8%  skip:58.3%  L0:45.3% L1:52.6% BI: 2.1%
[1;36m[libx264 @ 0x14b106320] [0m8x8 transform intra:61.1% inter:90.3%
[1;36m[libx264 @ 0x14b106320] [0mcoded y,uvDC,uvAC intra: 28.3% 40.1% 2.0% inter: 5.


## Cleanup and Finalization

(Optional) Cleanup temporary files and display/export the final video path.


In [None]:

# Example cleanup (adjust as needed)
# !rm -rf video_images/
# !rm left_eye.mp4 right_eye.mp4

# Display the final video path
final_video_path = 'outputv2-3D.mp4'
final_video_path
