<a href="https://colab.research.google.com/github/TUIlmenauAMS/Videocoding/blob/main/DCVC_FM_Record_Video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Colab Example to run the DCVC Video Coder

Needs to be run with GPU in the runtime type.

The recorded video must have more than 96 frames (= > 4sec for a frame rate of 25 fps)

The recorded video must have a resolution > 640x480

Change the resolution wherever indicated if desired

In [None]:
#@title Video Recorder Setup

# Written by Github user emilyxxie https://github.com/emilyxxie
# https://github.com/emilyxxie/colab_utils_and_snippets/blob/master/video_webcam_snippets.ipynb

from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode

def record_video1(filename='video.mp4'):
  js = Javascript("""
    async function recordVideo() {
      // mashes together the advanced_outputs.ipynb function provided by Colab,
      // a bunch of stuff from Stack overflow, and some sample code from:
      // https://developer.mozilla.org/en-US/docs/Web/API/MediaStream_Recording_API

      // Optional frames per second argument.
      const options = { mimeType: "video/webm; codecs=vp9" };
      const div = document.createElement('div');
      const capture = document.createElement('button');
      const stopCapture = document.createElement("button");
      capture.textContent = "Start Recording";
      capture.style.background = "green";
      capture.style.color = "white";

      stopCapture.textContent = "Stop Recording";
      stopCapture.style.background = "red";
      stopCapture.style.color = "white";
      div.appendChild(capture);

      const video = document.createElement('video');
      const recordingVid = document.createElement("video");
      video.style.display = 'block';

      const stream = await navigator.mediaDevices.getUserMedia({video: true});
      // create a media recorder instance, which is an object
      // that will let you record what you stream.
      let recorder = new MediaRecorder(stream, options);
      document.body.appendChild(div);
      div.appendChild(video);
      // Video is a media element.  This line here sets the object which serves
      // as the source of the media associated with the HTMLMediaElement
      // Here, we'll set it equal to the stream.
      video.srcObject = stream;
      // We're inside an async function, so this await will fire off the playing
      // of a video. It returns a Promise which is resolved when playback has
      // been successfully started. Since this is async, the function will be
      // paused until this has started playing.
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
      // and now, just wait for the capture button to get clicked in order to
      // start recording
      await new Promise((resolve) => {
        capture.onclick = resolve;
      });
      recorder.start();
      capture.replaceWith(stopCapture);
      // use a promise to tell it to stop recording
      await new Promise((resolve) => stopCapture.onclick = resolve);
      recorder.stop();

      let recData = await new Promise((resolve) => recorder.ondataavailable = resolve);
      let arrBuff = await recData.data.arrayBuffer();

      // stop the stream and remove the video element
      stream.getVideoTracks()[0].stop();
      div.remove();

      let binaryString = "";
      let bytes = new Uint8Array(arrBuff);
      bytes.forEach((byte) => {
        binaryString += String.fromCharCode(byte);
      })
      return btoa(binaryString);
    }
    """)
  try:
    display(js)
    data = eval_js('recordVideo({})')
    binary = b64decode(data)
    with open(filename, "wb") as video_file:
      video_file.write(binary)
    print(
        f"Finished recording video. Saved binary under filename in current working directory: {filename}"
    )
  except Exception as err:
      # In case any exceptions arise
      print(str(err))
  return filename

In [None]:
#@title Record a Video (with 96 frames minimum)
%cd /content/
!mkdir data
%cd data

# Run the function, get the video path as saved in your notebook, and play it back here.
from IPython.display import HTML
from base64 import b64encode

video_width = 300

video_path = record_video1()
video_file = open(video_path, "r+b").read()

# Count the number of frames
import imageio
vid=imageio.get_reader(video_path,  'ffmpeg')
nFrames = vid.count_frames()
print("Number of frames = ", nFrames)
assert nFrames >= 96, "Video is too short. Try again with a longer video!"

# Display video
video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

/content
mkdir: cannot create directory ‘data’: File exists
/content/data


<IPython.core.display.Javascript object>

Finished recording video. Saved binary under filename in current working directory: video.mp4
Number of frames =  256


In [None]:
# Convert the .mp4 video to YUV420 format
%cd /content/data
! ffmpeg -i video.mp4 videoYUV.yuv -y

/content/data
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable

In [None]:
# Clone the DCVC Git repository
%cd /content/
! git clone https://github.com/microsoft/DCVC.git
%cd /content/DCVC/DCVC-FM

/content
Cloning into 'DCVC'...
remote: Enumerating objects: 355, done.[K
remote: Counting objects: 100% (124/124), done.[K
remote: Compressing objects: 100% (87/87), done.[K
remote: Total 355 (delta 48), reused 79 (delta 36), pack-reused 231[K
Receiving objects: 100% (355/355), 6.43 MiB | 26.13 MiB/s, done.
Resolving deltas: 100% (128/128), done.
/content/DCVC/DCVC-FM


In [None]:
# Edit the YUV dataset file to only include the video data
# (add more data or change the video resolution if required)

datasetConfig="""{
    "root_path": "/content/",
    "test_classes": {
        "Webcam": {
            "test": 1,
            "base_path": "data",
            "src_type": "yuv420",
            "sequences": {
                "videoYUV":      {"width": 640, "height": 480, "frames": 96, "intra_period": -1}
            }
        }
    }
}"""

# Replace the number of frames
print(nFrames)
datasetConfig = datasetConfig.replace("96", str(nFrames))

# Overwrite the config file
datasetConfigFile = open("/content/DCVC/DCVC-FM/dataset_config_example_yuv420.json", "wt")
datasetConfigFile.write(datasetConfig)
datasetConfigFile.close()

256
{
    "root_path": "/content/",
    "test_classes": {
        "Webcam": {
            "test": 1,
            "base_path": "data",
            "src_type": "yuv420",
            "sequences": {
                "videoYUV":      {"width": 640, "height": 480, "frames": 256, "intra_period": -1}
            }
        }
    }
}


In [None]:
# Build the arithmetic codec
%cd src
! mkdir build
%cd build
! cmake ../cpp -DCMAKE_BUILD_TYPE=Release
! make -j
%cd ..
%cd ..

! apt-get install ninja-build
%cd ./src/models/extensions/
! python setup.py build_ext --inplace
%cd /content/DCVC/DCVC-FM

# Download checkpoints
%cd checkpoints/
! python download.py
%cd ..

/content/DCVC/DCVC-FM/src
/content/DCVC/DCVC-FM/src/build
-- The C compiler identification is GNU 11.4.0
-- The CXX compiler identification is GNU 11.4.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Configuring done (0.1s)
-- Generating done (0.0s)
-- Build files have been written to: /content/DCVC/DCVC-FM/src/build/3rdparty/pybind11/pybind11-download
[ 11%] Creating directories for 'pybind11'
[ 22%] Performing download step (git clone) for 'pybind11'
Cloning into 'pybind11-src'...
HEAD is now at 5b0a6fc chore: bump version to 3.10.4
[ 33%] No update step for 'pybind11'
[ 44%] No patch step for 'pybind11'
[ 55%] No configure 

In [None]:
# Install required packages
! python -m pip install -r /content/DCVC/DCVC-FM/requirements.txt
! pip install torch torchvision torchaudio

Collecting bd-metric (from -r /content/DCVC/DCVC-FM/requirements.txt (line 7))
  Downloading bd_metric-0.9.0-py3-none-any.whl (3.2 kB)
Collecting ptflops (from -r /content/DCVC/DCVC-FM/requirements.txt (line 8))
  Downloading ptflops-0.7.3-py3-none-any.whl (18 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2.0.0->-r /content/DCVC/DCVC-FM/requirements.txt (line 4))
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2.0.0->-r /content/DCVC/DCVC-FM/requirements.txt (line 4))
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2.0.0->-r /content/DCVC/DCVC-FM/requirements.txt (line 4))
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=2.0.0->-r /content/DCVC/DCVC-FM/requirements.txt (line 4))
  Using c

--rate_num : number of bitrates for compression (4), >=2

In [None]:
# Encode and decode the dataset in YUV format
%cd /content/DCVC/DCVC-FM
! python test_video.py --model_path_i checkpoints/cvpr2024_image.pth.tar --model_path_p checkpoints/cvpr2024_video.pth.tar --rate_num 2 --test_config dataset_config_example_yuv420.json --cuda 1 --worker 1 --write_stream 1 --output_path output.json --force_intra_period 9999 --save_decoded_frame 1

/content/DCVC/DCVC-FM
testing 2 rates, using q_indexes: 0, 63, 
  return F.conv2d(input, weight, bias, self.stride,
100% 2/2 [06:04<00:00, 182.37s/it]
Test finished
Tested 512 frames from 1 sequences
Total elapsed time: 6.1 min


In [None]:
# Convert the reconstructed videos from YUV to mp4
# (correct the dimensions if needed)
! ffmpeg -s 640x480 -i /content/DCVC/DCVC-FM/out_bin/Webcam/videoYUV_q0.yuv /content/DCVC/DCVC-FM/out_bin/Webcam/video_q0_reconstructed.mp4 -y
! ffmpeg -s 640x480 -i /content/DCVC/DCVC-FM/out_bin/Webcam/videoYUV_q63.yuv /content/DCVC/DCVC-FM/out_bin/Webcam/video_q63_reconstructed.mp4 -y

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [None]:
# Play the original video
video_path = "/content/data/video.mp4"
video_file = open(video_path, "r+b").read()
video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

In [None]:
# Play the reconstructed video - low quality, low bitrate, high compression
video_path = "/content/DCVC/DCVC-FM/out_bin/Webcam/video_q0_reconstructed.mp4"
video_file = open(video_path, "r+b").read()
video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

In [None]:
# Play the reconstructed video - high quality, high bitrate, low compression
video_path = "/content/DCVC/DCVC-FM/out_bin/Webcam/video_q63_reconstructed.mp4"
video_file = open(video_path, "r+b").read()
video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

In [None]:
# Calculate the bitrate
import os
# Compare output binary stream to input YUV
sizecompr    = os.stat("/content/DCVC/DCVC-FM/out_bin/Webcam/videoYUV_q0.bin").st_size
sizecompr63  = os.stat("/content/DCVC/DCVC-FM/out_bin/Webcam/videoYUV_q63.bin").st_size
sizeuncompr  = os.stat("/content/data/videoYUV.yuv").st_size
sizecomprmp4 = os.stat("/content/data/video.mp4").st_size
print("MP4  compression ratio     = ", sizeuncompr/sizecomprmp4)
print("DCVC compression ratio q0  = ", sizeuncompr/sizecompr)
print("DCVC compression ratio q63 = ", sizeuncompr/sizecompr63)

MP4  compression ratio     =  57.42556154230345
DCVC compression ratio q0  =  10706.665470530188
DCVC compression ratio q63 =  648.9082209656373
