In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy.fftpack import dct,idct

# The Basics of Codecs from a DSP Standpoint

##### Darren Ramsook
<img src="./Files/TTLAB.png" alt="Drawing" style="width: 200px;"/>


#### Presentation Overview

* Foundation Knowledge
    * Image Representation
    * Bit Depth
* Video Codecs
    * Intro. & History to Codecs
    * Generic Codec Practices
* Slides based on content from: https://github.com/leandromoreira/digital_video_introduction

#### Image Representation
* Digital Images are represented as a either a single Matrix (E.g. Black & White) or a stack of Matrices (E.g. RGB Images)

<img src="./Files/imageRep.png" alt="Drawing" style="width: 600px;"/>


In [None]:
smileFace = np.array([[0,0,0,0,0],[0,1,0,1,0],[0,0,1,0,0],[1,0,0,0,1],[0,1,1,1,0]])
print(smileFace)
#plt.imshow(smileFace, cmap="Greys", vmin=0, vmax=1)
#plt.colorbar()

In [None]:
rgbDemo = mpimg.imread("./Files/RGBImg.jpeg")
print(rgbDemo.shape)
plt.rcParams['figure.figsize'] = [10, 5]
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1,4)
ax1.axis('off')
ax2.axis('off')
ax3.axis('off')
ax4.axis('off')
ax1.imshow(rgbDemo) # Full Color
ax2.imshow(rgbDemo[:,:,0]) # Red Intensities
ax3.imshow(rgbDemo[:,:,1]) # Blue Intensities
ax4.imshow(rgbDemo[:,:,2]) # Green Intensities

#### Bit-depth

* Quantifies the color intensity being stored in an image channel.
* "True Color" = 16,777,216
    * 8 bits per pixel in a specific channel
    * 24 bits per pixel
<img src="./Files/bitdepth.png" alt="Drawing" style="width: 200px;"/>


In [None]:
tobago = mpimg.imread("./Files/tobago.jpg")
plt.imshow(tobago)
print(tobago.shape)

In [None]:
bitDepthConv = lambda i : int(((int((i/255)*4))/4)*255)
vectorizedbitDepthConv = np.vectorize(bitDepthConv)
tobago12BitDepth = vectorizedbitDepthConv(tobago)
plt.imshow(tobago12BitDepth,vmin=0,vmax=1)
# Total colours = 2^8 = 256

#### Video Codecs
* Without compression a 1hr30mins, 30fps, 3840 x 2160 (4k), 24 bits per pixel movie would need disk space:

$4031.08 GBs = \frac{5400(seconds)*30(fps)*8294400(pixels)*24(bitsperpixel)}{8(conversionToBytes)}$

* Streaming a 1280 x 720, 30fps, 24 bits per pixel would require bandwidth: 663.552 Mbps

* Codec is a portmanteau of Encoder-Decoder
* Codec timeline:
<img src="./Files/historyCodec.png" alt="Drawing" style="width: 800px;"/>


## Generic Codec Practices:

* Picture Partitioning
* Predictions 
* Transform
* Quantization
* Entropy Coding
* Bitstream Format

#### Picture Partitioning

* Dividing the frame into small partitions and even sub partitions
* By splitting, predictions can be better focused
    * Small paritions for smaller moving parts
    * Bigger partitions for background
<img src="./Files/partioning.png" alt="Drawing" style="width: 400px;"/>


#### Predictions
* Inter-prediction (Temporal Redundancy) vs. Intra-prediction (Spatial Redundancy)
* Inter-Prediction
<img src="./Files/interpred.png" alt="Drawing" style="width: 500px;"/>

* Intra-Prediction
<img src="./Files/intrapred.png" alt="Drawing" style="width: 300px;"/>

#### Transform
* After getting residual blocks (Predicted Blocks - Real Blocks), it can be transformed to signify which pixels can be discared while maximizing quality.
* One such transformation is the Discrete Cosine Transform.
    * Change of basis from its natural representation to a sum of cosine functions 

In [None]:
tobago = mpimg.imread("./Files/tobago.jpg")
rgb_weights = [0.2989, 0.5870, 0.1140]
tobago_gs = np.dot(tobago, rgb_weights)
print(tobago_gs)
plt.imshow(tobago_gs, cmap=plt.get_cmap("gray"))
plt.colorbar()

In [None]:
bagoSlice = tobago_gs[300:400,700:800]
plt.imshow(bagoSlice,cmap=plt.get_cmap("gray"))
plt.colorbar()

In [None]:
bagoSliceDCT = dct(bagoSlice)
plt.imshow(bagoSliceDCT)
plt.colorbar()

In [None]:
reconst = np.multiply(bagoSliceDCT,np.flip(np.tri(100,100,-1),0))
reconst = idct(reconst)
fig, (bagoOriginal, bagoReconst) = plt.subplots(1,2)
bagoOriginal.imshow(bagoSlice,cmap=plt.get_cmap("gray"))
bagoReconst.imshow(reconst,cmap=plt.get_cmap("gray"))

#### Quantization

* Removing some coefficients was a form of quantization. 

<img src="./Files/quant1.png" alt="Drawing" style="width: 400px;"/>
<img src="./Files/quant2.png" alt="Drawing" style="width: 400px;"/>
`

#### Entropy Coding
* Many algos that compress data.
    * Look at VLC Coding

##### VLC Coding
Consider needing to stream symbols e,a,t and assuming that you would spend 8 bits for each symbol, therefore 24 bits without compression.
<img src="./Files/vlccoding.png" alt="Drawing" style="width: 200px;"/>

This can be encoded as [10][0][1110] or 1001110 which requires 7 bits for the stream.

*Both encoder and decoder must have the symbol/binary table

#### Bitstream Format

* The compressed frames and context has to be sent to inform the decoder about all the decision taken by the encoder (bit depth, color space, resolution, pred. info (motion vectors etc.)) and many more.

* E.g. the H.264 standard defines that info must be sent in macro frames called the Network Abstraction Layer units. 

<img src="./Files/bitstream.png" alt="Drawing" style="width: 400px;"/>
<img src="./Files/lut.png" alt="Drawing" style="width: 250px;"/>


### Find this Presentation with Code on https://github.com/DarrenR96/