## Compression of a Video using a Color Transform, Chroma Downsampling, Block-DCT with Block-Sampling, and Motion Compensation


- To improve compresstion performance, it also include motion compesation, for instance using the nearest neighbour approach.

- For simplicity, for a size (480, 640) frame, omit the boundaries, e.g. just use the blocks 10 to 50 vertically and 10 to 60 hroizontally for motion compensation.  
- To optimize compression rate and visual quality, for the nearest neighbour approach, find a suitable threshold (for the mae or mse) above which motion is detected, and a suitable search neighbourhood (limit the length of the motion vectors)
- Use a Group of Pictures (GoP) sequence of I,P,I, meaning after each intra coded frame follows a predicted frame.
- For simplicity for the predicted frames, just store the motion vectors and not the residual, and decode just from them and the previous I frame.
- Store these in a pickle file "encoded.pickle" as encoded file.  
- A corresponding decoder function "decoder", which takes the pickle file "encoded.pickle" as input and writes the decoded video in file "decoded.mp4".

In [None]:
#sudo apt-get install libopencv-dev python3-opencv
#!pip3 install opencv-python
#!pip3 install pickle5

import cv2
import numpy as np
import pickle5 as pickle
import scipy.signal
import scipy.fftpack as sft
import numpy as np

def mean_squared_error(Previous_frame, Current_frame):
    return ((Current_frame-Previous_frame)**2).mean(axis=None)

def psnr_image(encoded_plane, reference_plane):

    mse = mean_squared_error(reference_plane, encoded_plane)
    if mse == 0: 
        return np.inf
    max_i = 255
    psnr = 20 * np.log10(max_i) - 10 * np.log10(mse)
    return psnr

def view_as_block(img, blocksize=(8, 8), color = False):
    if color:
        h, w = img[:,:,0].shape[0:2]
        for i in range(0, h-blocksize[0], blocksize[0]):
            for j in range(0, w-blocksize[1], blocksize[1]):
                block = img[i:i + blocksize[0], j:j + blocksize[1],:]
                if block[:,:,0].shape == blocksize:
                    yield i, j, block
    else:
        h, w = img.shape[0:2]
        for i in range(0, h, blocksize[0]):
            for j in range(0, w, blocksize[1]):
                block = img[i:i + blocksize[0], j:j + blocksize[1]]
                if block.shape == blocksize:
                    yield i, j, block


def search_range(i, max_i, max_distance, blocksize):
    max_range = min(i + max_distance + int(blocksize / 2), max_i - int(blocksize / 2))
    min_range = max(i - max_distance, 0)
    return range(min_range, max_range)


def compress(image):
    factor = 4
    size = 2
    compressed_blocks = np.zeros(image.shape)
    subband = np.zeros((int(image.shape[0] / factor), int(image.shape[1] / factor)))
    for i, j, b in view_as_block(image, (8, 8)):
        block = sft.dctn(b, norm='ortho')
        k = int(i / factor)
        l = int(j / factor)
        subband[k:(k + size), l:(l + size)] = block[:size, :size]
    return subband


def decompress(image):
    factor = 4
    size = 2
    decomp = np.zeros((image.shape[0] * factor, image.shape[1] * factor))

    for i in range(0, image.shape[0], size):
        for j in range(0, image.shape[1], size):
            block = np.zeros((8, 8))
            block[:size, :size] = image[i:(i + size), j:(j + size)]
            block = sft.idctn(block, norm='ortho')
            k = int(i * factor)
            l = int(j * factor)
            decomp[k:(k + 8), l:(l + 8)] = block
    return decomp


def exhaustive_search(prev, curr, blocksize=(8, 8), max_distance=10):
    motion_vectors = np.zeros([curr.shape[0] // blocksize[0], curr.shape[1] // blocksize[1], 2]).astype(int)
    for i, j, b in view_as_block(curr, blocksize=blocksize, color = True):
        
        bi = np.floor(i / blocksize[0]).astype(int)
        bj = np.floor(j / blocksize[1]).astype(int)

        block_results = []
        equal_found = False

        blockrange = [[10,50],[10,60]]
        if bi in range(blockrange[0][0],blockrange[0][1]) and bj in range(blockrange[1][0],blockrange[1][1]):
            # (ti, tj) refers to indices of the block under test
            for ti in search_range(i, curr.shape[0], max_distance, blocksize[0]):
                for tj in search_range(j, curr.shape[1], max_distance, blocksize[1]):
                    # selct the block that we want to check
                    block_to_check = prev[ti:ti + blocksize[0], tj:tj + blocksize[1]]
                    # calculate psnr of block_to_check and block b
                    sim = 0
                    for c in range(3):
                        sim += psnr_image(block_to_check[:,:,c], b[:,:,c])
                    block_results.append((ti, tj, sim))

                    if sim == np.inf or sim >= 85.5:  # here a match is found
                        equal_found = True
                        #print("found")
                    if equal_found:
                        break
                if equal_found:
                    break
            if sim != np.inf:
                m = np.argmax(np.array(block_results)[:, 2])
                ti, tj, sim = block_results[m]
            motion_vectors[bi, bj, 0] = (i - ti) / blocksize[0]
            motion_vectors[bi, bj, 1] = (j - tj) / blocksize[1]
    return motion_vectors

def predict_frame(prev, mv, blocksize=(8, 8)):
    pred = prev
    for i in range(mv.shape[0]):
        for j in range(mv.shape[1]):
            bi = i * blocksize[0]
            bj = j * blocksize[1]
            mi = bi + mv[i, j, 0]
            mj = bj + mv[i, j, 1]
            pred[mi:mi + blocksize[0], mj:mj + blocksize[1],:] = prev[bi:bi + blocksize[0], bj:bj + blocksize[1],:]
    return pred



#####################################################Encoder#####################################################



def encoder():
        
    cap = cv2.VideoCapture('videorec.mp4')
    g=open('encoded.pickle', 'wb')
    #Prevous Y frame:
    
    n=0 #frame counter
    while(cap.isOpened()):
        ret, frame0 = cap.read()
        if ret==0:
            break
            
        [rows,columns,d]=frame0.shape
        #Yprev=np.zeros((rows,columns)) #memory for previous Y frame
        #motion vectors, for each block a 2-d vector:
        mv=np.zeros((rows//8,columns//8,2))
        
        # YOUR CODE HERE
        if n % 2 == 0:
            iframe = True;
        else:
            iframe = False;
        if iframe:
            (B, G, R) = cv2.split(frame0)

            Y = 0.299 * R + 0.587 * G + 0.144 * B
            C_b = - 0.16864 * R - 0.33107 * G + 0.49970 * B
            C_r = 0.499813 * R - 0.418531 * G - 0.081282 * B
            # print(C_b)

            C_b_enc = C_b[::2, ::2]
            C_r_enc = C_r[::2, ::2]

            Y = compress(Y)
            C_b_enc = compress(C_b_enc)
            C_r_enc = compress(C_r_enc)

            Y = np.int16(Y)
            C_b_enc = np.int16(C_b_enc)
            C_r_enc = np.int16(C_r_enc)

            pickle.dump(Y, g)
            pickle.dump(C_b_enc, g)
            pickle.dump(C_r_enc, g)
            prev = frame0
        else:
            pred = frame0
            mv = exhaustive_search(prev, pred) + 50
            mv = np.uint8(mv)
            pickle.dump(mv, g)
        print(n)
        n += 1
        ### End Code here

    cap.release()
    return 

##Decoder:#####################
def decoder():
    
    g=open('encoded.pickle', 'rb')
    height , width = (480, 640)
    # Define the codec and create VideoWriter object
    #fourcc = cv.CV_FOURCC(*'XVID')
    #fourcc = cv.CV_FOURCC('D','I','V','X')
    #fourcc = cv2.VideoWriter_fourcc(*'XVID')
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    #Open video output file:
    out = cv2.VideoWriter('decoded.mp4',fourcc, 20.0, (width,height))
    n=0 #frame counter
    while(True):
        #load next frame from file f and "de-pickle" it, convert from a string back to colortransform or tensor:
        
        print("Frame= ", n)
        #every 2nd frame is an iframe:
        if n%2==0:
          iframe=True;
        else:
          iframe=False;

        #Load an encoded frame:
        if iframe: 
            #print("IFrame")
            try:
                Ydct=pickle.load(g)
                if n==0:
                    (rows, cols)=Ydct.shape #find stored video dimensions
                    print("(rows, cols)=",(rows, cols))
                    #out = cv2.VideoWriter('decoded.mp4',fourcc, 20.0, (cols,rows)) #write to this file
            except (EOFError):
                break
            DCb = pickle.load(g)
            DCr = pickle.load(g)
            Y = decompress(Ydct)
            DCb = decompress(DCb)
            DCr = decompress(DCr)

            C_b = (np.repeat(np.repeat(DCb, 2, axis=0), 2, axis=1))
            C_r = (np.repeat(np.repeat(DCr, 2, axis=0), 2, axis=1))

            R = Y + 1.4025 * C_r
            G = Y - 0.34434 * C_b - 0.7144 * C_r
            B = Y + 1.7731 * C_b

            R = np.uint8(R)
            G = np.uint8(G)
            B = np.uint8(B)
            framedec = cv2.merge([B, G, R])
            prev = framedec
        else:
            # print("PFrame")
            try:
                MV = pickle.load(g)
            except (EOFError):
                break
            MV = MV.astype(int) - 50
            #print(MV)
            pframe = predict_frame(prev, MV)
            framedec = np.uint8(pframe)

        print("framedec.shape=",framedec.shape)
        out.write(framedec)
        n+=1
    out.release()
    return
    
print('All Done')

All Done


- Run the next cell to evaluate your code.

In [None]:
#the cell will pass if no errors are raised by "assert", and fail otherwise
import scipy.signal as sp
import numpy as np
import matplotlib.pyplot as plt
import os
import sys


encoder()
decoder()

print("Ruinning submission done. Even if the Validade Notebook passes all tests that doesn't mean that your answer is correct!")
#print("Playing Videos...")
#!python play_video.py 'videorec.mp4' 'decoded.mp4'

compsize=os.path.getsize('encoded.pickle')
decsize=os.path.getsize('decoded.mp4')
logcompression=np.log(decsize/compsize)
print("Log Compression ratio=", logcompression)



#"""
#os.system('ffmpeg -i videorec.mp4 -i decoded.mp4 -filter_complex "ssim" -f null /dev/null &> out.txt')
#!pwd
#!ffmpeg -i videorec.mp4 -i decoded.mp4 -filter_complex "ssim" -f null /dev/null &> out.txt
old_stdout = sys.stdout
sys.stdout = open('out.txt', 'w')
!ffmpeg -i videorec.mp4 -i decoded.mp4 -filter_complex "ssim" -f null /dev/null 
os.system("grep -oP '(?<=All:)[0-9.]+' out.txt > ssim.txt")
ssimval=np.loadtxt('ssim.txt')
sys.stdout = old_stdout
print("ssim value=", ssimval)

#performance=logcompression+0.05/(1-ssimval)
performance=logcompression+30*ssimval
print("performance=", performance)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
Frame=  0
(rows, cols)= (120, 160)
framedec.shape= (480, 640, 3)
Frame=  1
framedec.shape= (480, 640, 3)
Frame=  2
framedec.shape= (480, 640, 3)
Frame=  3
framedec.shape= (480, 640, 3)
Frame=  4
framedec.shape= (480, 640, 3)
Frame=  5
framedec.shape= (480, 640, 3)
Frame=  6
framedec.shape= (480, 640, 3)
Frame=  7
framedec.shape= (480, 640, 3)
Frame=  8
framedec.shape= (480, 640, 3)
Frame=  9
framedec.shape= (480, 640, 3)
Frame=  10
framedec.shape= (480, 640, 3)
Frame=  11
framedec.shape= (480, 640, 3)
Frame=  12
framedec.shape= (480, 640, 3)
Frame=  13
framedec.shape= (480, 640, 3)
Frame=  14
framedec.shape= (480, 640, 3)
Frame=  15
framedec.shape= (480, 640, 3)
Frame=  16
framedec.shape= (480, 640, 3)
Frame=  17
framedec.shape= (480, 640, 3)
Frame=  18
framedec.shape= (480, 640, 3)
Frame=  19
framedec.shape= (480, 640, 3)
Frame=  20

In [None]:
print(performance)

26.680417538855423
