In [1]:
"""
                                            Source code Summary

This code in this file is used to compress the video file obtained from static cameras like CCTV.
The 1st step is to load the target video and extract the frames from it. For video loading and frame extraction, the open-CV library is used. The 
1st frame of every video that is needed to be compressed is used as the reference frame and saved as it is in the output file. After storing
the 1st frame the upcoming frame represented as frame_N is extracted from the video stream. To find the non-redundant segment in the frame_N, frame_N
and reference frame has been passed through a frame segmentation process to conver it into segments of desired size. In our case we used a segment size
of 16*16. This segment size is adjustable and can be selected as per requirement of the user. A user define function "frame_segmentor" and "segmentor" 
has been used for segmenting 3D (BGR frame) and 2D frame respectively. The corresponding segments from reference_frame and frame_N are then fed into a
difference filter to remove the common section/pixels from the frame_N segment by converting its value to 0. The result of the difference filter is
saved in seg_diff variable. In order to remove noise from the seg_diff a threshold (pct=pixel correction threshold) has been applied to it. Any
value below this threshold has been converted to zero to remove the noise if any exist.
The clean seg_diff is then checked whether it’s a redundant or non-redundant segment by counting its non-zero value. If the count %age is higher than
the set threshold (missmatch_threshold), it is considered as the non-redundant segment and will be stored otherwise discarded. To store the 
non-redundant segments a empty frame (encoded_frame_segmented) of the same dimension as that of the reference frame has been created and then converted
 to segmented similar to that of refernce_frame in order to store the non-redundant segments. The index location of this segment has also been saved in 
 a ".bin" file which will be used in the reconstruction of frames in decoding process. This process will continue untill all the segment in the
refrence_frame has been processed.
When the segments count in the encoded_frame_segmented becomes equal to that of the segmented count in reference frame it has been desegmentized by
using user define "cell2mat" function and then stored it in the output video file by using cv library command. To store the frame in output video file
 1st a video object has been created "v_enc=cv.VideoWriter()" and then the output frame is stored in it using "v_enc.write(output_frame)" command.
 If a whole frame is redundant then a specil code (255 0) has been added to .bin file indicating that in the reconstruction process the reference_frame
 will be inserted in the decoded file. When the whole process completes, another special code (255 255) is added to the .bin file. This help in the
 reconstruction process, indicating that all frames has been reconstructed and stop further processing.

 For running this code the required dependencies are;
 1. Pip install numpy
 2. Pip install opencv-python
 3. Pip install scikit-image

 The section wise description is available in each section




"""
# Some well known libraries to load video files and process frames. (each frame is a 3D matric)

import numpy as np
import cv2 as cv
import struct


In [2]:
# imshow is a user define function and used to display image if needed. It is mostly used for debugging purpose. This function uses open-Cv library
#  to display frames.
def imshow(frame) :
    cv.imshow('Figure', frame)
    cv.waitKey(0)
    #video.release()
    cv.destroyAllWindows()

#===========================================================================

# This user define function is used to convert a 3D frame like reference_frame and frame_N into desired segments (16*16). It take two argumnts as
# an input one is the frame itself and the other is the segment size which is 16*16 in our case. This function uses numpy library to handle
# farme segments.

# 3D frame segmentor
def frame_segmenter (frame,segment_size):
    height, width, channels = frame.shape
    
    rows = height // segment_size
    columns = width // segment_size

    # Initialize an empty array to store the segments
    segments = np.empty((rows, columns, segment_size, segment_size, channels), dtype=frame.dtype)

    # Split the image into segments
    for i in range(rows):
        for j in range(columns):
            segment = frame[i*segment_size:(i+1)*segment_size, j*segment_size:(j+1)*segment_size, :]
            segments[i, j] = segment
    return segments


#========================================================================================================================
# This user define function is also used for segmenting a 2D frame into desired segments. It takes two argumnets as an inputs in which one is the
# frame and the 2nd one is the segment size. This function uses numpy library to handle farme segments.

# 2D frame segmentor
def segmentor(original_list,segment_size):

    shp = original_list.shape
    M=shp[0]
    N=shp[1]
    
    # Calculate the number of segments in rows and columns
    num_segments_m = M // segment_size
    num_segments_n = N // segment_size

    # Create an empty array to store the segmented data
    segmented_list = np.empty((num_segments_m, num_segments_n, segment_size, segment_size))

    for i in range(num_segments_m):
        for j in range(num_segments_n):
            segment = original_list[i*segment_size:(i+1)*segment_size, j*segment_size:(j+1)*segment_size]
            segmented_list[i, j] = segment

    print(segmented_list.shape)
    return segmented_list
#=========================================================================================================================

# This function is used to convert the frame containg the non-redundant segment (encoded_frame_segmented) into a continuous frame once completely
# filled to store it in the encoded file.
#This function make use of the "cat" funtion which is also a user define function. It concatinates the segments row wise and coloumn wise also
# ensuring correct allignmnet.

def cell2mat(segmentedFrame):
    tmp_rslt=[]
    resultFrame=[]
    shape= segmentedFrame.shape
    rows=shape[0]
    columns=shape[1]

    for ii in range(rows):
        for jj in range(columns):
            tmp_rslt=cat((tmp_rslt,segmentedFrame[ii, jj]),axis=1)

        resultFrame=cat((resultFrame,tmp_rslt),axis=0)
        tmp_rslt=[]
    
    return resultFrame
#========================================================================================================================

#This user define function is used to cocatenate the segments row wise and column wise also ensurimg correct allignmnet. This function is used 
#as a sub-function of "cell2mat" function. This function uses numpy library to handle tuple of arrays.

def cat(tupleOfArrays, axis=0):
    # deals with problems of concating empty arrays
    # also gives better error massages

    # first check that the input is correct
    assert isinstance(tupleOfArrays, tuple), 'first var should be tuple of arrays'

    firstFlag = True
    res = np.array([])

    # run over each element in tuple
    for i in range(len(tupleOfArrays)):
        x = tupleOfArrays[i]
        if len(x) > 0:  # if an empty array\list - skip
            if isinstance(x, list):  # all should be ndarray
                x = np.array(x)
            if x.ndim == 1:  # easier to concat 2d arrays
                x = x.reshape((1, -1))
            if firstFlag:  # for the first non empty array, just swich the empty res array with it
                res = x
                firstFlag = False
            else:  # actual concatination

                # first check that concat dims are good
                if axis == 0:
                    assert res.shape[1] == x.shape[1], "Error concating vertically element index " + str(i) + \
                                                       " with prior elements: given mat shapes are " + \
                                                       str(res.shape) + " & " + str(x.shape)
                else:  # axis == 1:
                    assert res.shape[0] == x.shape[0], "Error concating horizontally element index " + str(i) + \
                                                       " with prior elements: given mat shapes are " + \
                                                       str(res.shape) + " & " + str(x.shape)

                res = np.concatenate((res, x), axis=axis)
    return res

#====================================================================================================================

#This is a user define function to calculte the PSNR and SSIM values of the frames. It take two frames (reference_frame,decoded_frame) in the
# input arguments to calculate the above mentioned values. 
#This function uses open-cv and "scikit images" library for SSIM and PSNR value calculation.

def eval_matric(reference_frame,decoded_frame):
    import cv2
    from skimage.metrics import peak_signal_noise_ratio, structural_similarity

    # Convert frames to grayscale (assuming frames are color images)
    gray_frame1_reference = cv2.cvtColor(reference_frame, cv2.COLOR_BGR2GRAY)
    gray_frame2_decoded = cv2.cvtColor(decoded_frame, cv2.COLOR_BGR2GRAY)

    # Calculate PSNR
    psnr_value = peak_signal_noise_ratio(gray_frame1_reference, gray_frame2_decoded)

    # Calculate SSIM
    ssim_value, _ = structural_similarity(gray_frame1_reference, gray_frame2_decoded, full=True)


    return psnr_value, ssim_value

In [None]:
"""
The flow of the code execution is
1. Open the video file
2. Extract the 1st frame and keep it as reference
3. Extract the dimension (Height,Width,Channels) of the frames 
4. Conver the frame into segments
5. Extract the next frame as frame_N.
6. Conver it into segments
7. Take the 1st segment of both the frames and pass it through the difference filter.
8. Apply threshold value to the result of the difference filter to remove any noise.
9. Calculte the %age of non zero value in clean segment.
10. If the %age is  higher than the missmatch_threshold store the segment in an "empty segmented frame" else discard it.
11. Repeat the process from 7 to 10 fro the all the segments.
12. Whenever the segment count in the empty segmented frame reaches to that of in reference frame apply the cell2mate function to it and store it
in the encoded video.

"""
#  Open the video file
video = cv.VideoCapture('s1.avi')

# # Read the first frame from vido file it will be used as reference frmae and will be store direcly in the encoded video file
ret, frame_C = video.read()    

segment_size=16  # Initializing the segement size by 16. It will be used to segment the frames in 16*16 segments accross all channels
height, width, channels = frame_C.shape # Extracting the height width and channel value 
rows = height // segment_size # Calculating the number of segemnts in height of the frame.
columns = width // segment_size # Calculating the number of segemnts in width of the frame.
frame_C=frame_C[0:rows*segment_size,0:columns*segment_size,:] # Converting the frame to a regular size frame 
frame_C_segmented=frame_segmenter(frame_C,segment_size) # Segmenting frame_C into 16*16 segments accross all channels(BRG)

#Creating object for storing the encoded video using H.264 Codec
v_enc = cv.VideoWriter('enc_s1_1.mp4', cv.VideoWriter_fourcc(*'x264'), video.get(cv.CAP_PROP_FPS), (columns*segment_size, rows*segment_size))
v_enc.set(cv.VIDEOWRITER_PROP_QUALITY, 100) # Setting the quality of the video object to 100.

p=v_enc.write(frame_C)  # storing 1st frame as reference frame in the encoded video
missmatch_threshold=2 # Initializing missmatch threshold value in %. If a segment missmatch is higher than this, it will be store else discarded.
pct=50 # Its pixel correction threshold and is used for removing noise from the segments coming from the difference filter
f_count=200 # No of frame cout to be processed during the experiment
f_countt=0 # A counter used for counting the processed frame during execution
mmsc=0  # This counter count the missmatched segments in the "encoded_frame_segmented". 

evf=1  # encoded video frame counter, it counts the frames in the encoded file

# Creating an empty frame and segmenting it for storing non-redundant segments
encoded_frame_segmented = np.empty((rows, columns, segment_size, segment_size, channels)) 
tmp_encoded_frame = encoded_frame_segmented.copy()


with open("data_s1.bin", "wb") as file:  # The .bin file is used to store the index location of the non-redundant segments and
    #                                       will be used in the reconstruction of decoded frames. The decoded frames will be
    #                                         used in calculating PSNR and SSIM of the decoded video obtained from proposed algorithm

    # while (video.isOpened()): #will run till the end of file
    while (f_count): #will run till the value of f_count become 0
        f_count-=1 # Decrementing the frame count so that the while loop terminate upon reaching it value to zero
        f_countt+=1 # Counting the frames that has been processed during execution

        print('frames processed=',f_countt )

#         reading Next frame frame_N
        ret, frame_N = video.read()

        


        # Check if the frame was successfully read
        if ret==True:

            # Converting the frame_N to a regular size frame
            frame_N=frame_N[0:rows*segment_size,0:columns*segment_size,:]
            frame_N_segmented=[] # Clearing frame_N_segmented for new values
            frame_N_segmented=frame_segmenter(frame_N,segment_size) # segmenting frame_N using frame_segmenter function

            red_status=1   # Its a flag used for indicating a frame which is completely redundant.

            # Nested for loop is used to access all the segments of the segmented frame.
            for ii in range(rows):
            
                for jj in range(columns):
                    segment_diff=np.abs(np.int16(frame_N_segmented[ii,jj])-np.int16(frame_C_segmented[ii,jj])) # Difference filter 
                    segment_diff[segment_diff<=pct]=0 # Applying threshold to remove noise
                    miss_match_count=np.count_nonzero(segment_diff) # to count the non zero values in the segment
                    miss_match=miss_match_count/np.size(segment_diff)*100 # calculating the missmatch in percentage


                    # In this "if" function the missmatch value is checked. If its above the missmatch threshold then the segemnt will be stored in the
                    # encoded_frame_segmented and its index information will be stored in the .bin file and the red_status flag will be converted to zero
                    # showing that the whole frame is not redundant.
                    
                    if (miss_match>=missmatch_threshold):
                        ir,jc=divmod(mmsc,columns)
                        mmsc+=1 # mismatched segments counter
                        mm_segment=frame_N_segmented[ii,jj].copy()
                        red_status=0
                        encoded_frame_segmented[ir,jc]=mm_segment
                
                        num = [ii, jj]
                        binary_data = struct.pack("BB", *num)
                        file.write(binary_data)
    
                        # This "if" statemnt block is used to check whether the encoded_frame_segemnted is full or not. If yes then it will be converted
                        # to a continuous frame using cell2mat function and will be saved in the encoded video file. The mmsc (missmatch segment counter)
                        #will be set to 0 and the evf(encoded video frame) counter will be incremented by 1.
                        if mmsc==(rows*columns):
                            
                            de_f=np.uint8(cell2mat(encoded_frame_segmented))
                            v_enc.write(de_f)
                            cv.imwrite('encode.jpg',de_f)
                            encoded_frame_segmented=tmp_encoded_frame.copy()  
                            mmsc=0
                            evf+=1       # encoded video frames counter
                            print('encoded frame filled' )
                                    
            # This "if" statement block will execute after the above nested "for loop" is done. it is used to check if a whole frame is redundant
            # or not. If red_status is high it means that whole frame is redundant and a special code (255 0) is added to the .bin file.
            if red_status==1:   #  a whole redundent frame has been found
                num = [255, 0]
                binary_data = struct.pack("BB", *num)
                file.write(binary_data)
                mmsc+=1

            # When the nested "for loop" is fully executed  and the red_status is 0 it means that all the frames has been processed and no futher frames are
            # available to process and a special code (255 255) will be added to the .bin file indicating the completion of the process. 
            else:       # frame processing completed
                num = [255, 255]
                binary_data = struct.pack("BB", *num)
                file.write(binary_data)
  
                mmsc+=1

        else:
            break
    print("Process completed")
    de_f=np.uint8(cell2mat(encoded_frame_segmented))
    v_enc.write(de_f)
    evf+=1
    video.release()
    v_enc.release()
    print('total frames in encoded video =',evf)
    cv.destroyAllWindows()



In [3]:
"""
Decoder and SSIM, PSNR evaluation source code
                                                Code Summary

This part of the source code is used for decoding the encoded video, frame by frame and calculating PSNR and SSIM for each decoded frame.

The encoded video file contains a reference frame which is the 1st frame of the said video sequence and all other farmes are the encoded frames
containg the non-redundant segemnts. To reconstruct the frames the referece frame and the subsequent encoeded frame is converted into segments 
of same segment size (16*16) which is used during the encoding process using frame_segmenter, a user define function. The .bin file is also 
loaded to get the index value of the 1st segment of the segmented encoded frame. This 1st segment will be palced in the segmented reference frame 
at the location indicated by the index value obtained .bin file. This process will continue till a special code (255 0) is detected in .bin file
indicating that the frame has completed. This frame will be then evaluated for the PSNR and SSIM values. If (255 255) code is detected in the .bin
file values then the reference frame will be used ad the output frame and will be evaluated for the SSIM and PSNR value.

Note:The SSIM and PSNR values of the output frames will be calculated against their corresponding frame of the original video frames


"""
dec_frame_count=0   # This counter variable is used to count the number of decoded frames
psnr_val=[]          # This array is used to store the PSNR values of the frames and then the average value is calculated at the end.
ssim_val=[]         # This array is used to store the SSIM values of the frames and then the average value is calculated at the end.

# Opening the encoded video using open-cv command 
enc_video = cv.VideoCapture('enc_s1.mp4') # Proposed algorithm encoded video is used as input argumnet.

#Opening original video using open-cv command. Its frames will be used for caculating the PSNR and SSIN values
ref_video=cv.VideoCapture('s1.avi')  # Original video

#  Read the first frame
ret, frame_ref = enc_video.read() # Extracting 1st frame which is the reference frame

ret1, reference_frame = ref_video.read() # Extracting 1st frame from the original video.


segment_size=16 # Initializing the segement size by 16. It will be used to segment the frames in 16*16 segments accross all channels
height, width, channels = frame_ref.shape # Extracting the height width and channel value
rows = height // segment_size # Calculating the number of segemnts in height of the frame.
columns = width // segment_size # Calculating the number of segemnts in width of the frame.


# Converting the reference_frame to a regular size frame
reference_frame=reference_frame[0:rows*segment_size,0:columns*segment_size,:]

# calculating the PSNR and SSIM value of the 1st frame (reference frame) with its coreecponding frame in the original video
psnr_value,ssim_value=eval_matric(reference_frame,frame_ref)  # eval_matric(reference_frame,decoded_frame)


psnr_val.append(psnr_value)
ssim_val.append(ssim_value)
print(f'PSNR: {psnr_value:.2f} dB')
print(f'SSIM: {ssim_value:.4f}')

if ret==True: # Checking the status of the reference frame whether successfuly extarcted are not and then display the staus using "print" commands 
   
   # Segmenting the 1st frame of the encode video which is the reference frame into a 16*16 segments accross all channels(BRG)
    frame_ref_segmented=frame_segmenter(frame_ref,segment_size)
    tmp_ref_segmented=frame_ref_segmented.copy()
    
    
    print('reference frame successfuly extracted')
else:
    print('reference frame extraction failed')
    
with open("data_s1.bin", "rb") as file:  # opening .bin file
    
    while (enc_video.isOpened()): # This loop will run until all frames has been processed.
        ret, frame_enc = enc_video.read() # Reading frame from the encoded video 

        ret1, reference_frame = ref_video.read() # Reading corresponding frame from the original vidoe used as reference 
        reference_frame=reference_frame[0:rows*segment_size,0:columns*segment_size,:] # Converting the reference_frame to a regular size frame
        
        if ret==True: # Checking the status of the encoded frame whether successfuly extarcted are not.

            print('encoded frame successfuly extracted') # Printing the successful extraction of the encded frame
            frame_enc_segmented=frame_segmenter(frame_enc,segment_size) # Segemnting the encoded frame into 16*16 segments
            
            # Nested for loop is used to access all segemnts of both (encoded and reference) segmented frames
            for ii_dec in range(rows):

                for jj_dec in range(columns):
                                      
                    binary_data = file.read(2)  # Reading index data of segemnts from the .bin file
                    
                    if binary_data: # Checking if the data is available or not

                        i_d,j_d = struct.unpack("BB", binary_data) #extracting index data of segemnts from the .bin file

                        # Ths "if" statement block is used to detect that the reconstruction of a frame has been completed and is readyfor PSNR
                        # and ISSM value calculation. The above mentioned values were calculated and then added to their corresponding arrays.
                        if (i_d==255) & (j_d==255):
                            dec_frame_count+=1  # decoded frames count incerented
                            dec_frame=np.uint8(cell2mat(frame_ref_segmented))
                            print('frame successfully decoded',dec_frame_count)

                            psnr_value,ssim_value=eval_matric(reference_frame,frame_ref)  # eval_matric(reference_frame,decoded_frame)
                            print(f'PSNR: {psnr_value:.2f} dB')
                            print(f'SSIM: {ssim_value:.4f}')
                            psnr_val.append(psnr_value)
                            ssim_val.append(ssim_value)

                            frame_ref_segmented=tmp_ref_segmented.copy()   
                                             

                        # This "else" statement is used to identify a special code (255 0) in the .bin file value which indicate that no non-redundant
                        #  segment were found during encoding processs and whole frame was redundant so the refrence farme as it is will be used as the
                        #  output frame and its PSNR and SSIM values are calculated and then will be added to their corresponding arrays.
                        elif (i_d==255) & (j_d==0):
                            dec_frame_count+=1  # decoded frames count incremented
                            print('frame successfully decoded',dec_frame_count)
                            psnr_value,ssim_value=eval_matric(reference_frame,frame_ref)  # eval_matric(reference_frame,decoded_frame)
                            print(f'PSNR: {psnr_value:.2f} dB')
                            print(f'SSIM: {ssim_value:.4f}')
                            psnr_val.append(psnr_value)
                            ssim_val.append(ssim_value)
                        
                        else:
                            # extracting segemnt from the "segmented encoded frame" 
                            dec_seg=frame_enc_segmented[ii_dec,jj_dec] #---------------
                            # Placing the extracted segment in the reference frame using index information obtained from the .bon file
                            frame_ref_segmented[i_d,j_d]=dec_seg.copy()  #==========================


            dec_frame=np.uint8(cell2mat(frame_ref_segmented)) #Converting the reconstructed frame to a continuous frame
        else:
            print('decoding process completed')
            print('Proposed algorithm')
            print(f'SSIN ={np.mean(ssim_val):.4f}','\n',f'PSNR ={np.mean(psnr_val):.2f} dB')
            break
            
enc_video.release()
ref_video.release()
cv.destroyAllWindows()

PSNR: 44.07 dB
SSIM: 0.9978
reference frame successfuly extracted
encoded frame successfuly extracted
frame successfully decoded 1
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 2
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 3
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 4
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 5
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 6
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 7
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 8
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 9
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 10
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 11
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 12
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 13
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 14
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 15
PSNR: 44.07 dB
SSIM: 0.9978
frame successfully decoded 16
PSNR: 4