### References and Opensource Code:

### **Foveation**:

Implementation of the foveation method is obtained from this github [repository](https://github.com/ouyangzhibo/Image_Foveation_Python) 

Algorithm is replicated from the  following papers:

[Paper1:](http://svi.cps.utexas.edu/EI466209.pdf): Gaze-contingent real-time simulation of arbitrary visual fields

[Paper2:](https://openaccess.thecvf.com/content_cvpr_2015/papers/Jiang_SALICON_Saliency_in_2015_CVPR_paper.pdf)
Salicon: Saliency in context

### **Object detection**:
[Face Detection using Haar Cascades](https://docs.opencv.org/3.4.3/d7/d8b/tutorial_py_face_detection.html) from OpenCV

[Original Paper](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=990517&tag=1) for Haar Cascades


### **Optical flow(flownet)**:
[Flownet](https://towardsdatascience.com/a-brief-review-of-flownet-dca6bd574de0) from ml4a models

[Original Paper](https://arxiv.org/pdf/1504.06852.pdf) for Flownet

### Imports and Installations

In [None]:
!pip3 install --quiet ml4a

[?25l[K     |▎                               | 10 kB 24.1 MB/s eta 0:00:01[K     |▋                               | 20 kB 26.4 MB/s eta 0:00:01[K     |█                               | 30 kB 11.4 MB/s eta 0:00:01[K     |█▏                              | 40 kB 8.8 MB/s eta 0:00:01[K     |█▌                              | 51 kB 5.3 MB/s eta 0:00:01[K     |█▉                              | 61 kB 5.8 MB/s eta 0:00:01[K     |██                              | 71 kB 5.6 MB/s eta 0:00:01[K     |██▍                             | 81 kB 6.3 MB/s eta 0:00:01[K     |██▊                             | 92 kB 4.9 MB/s eta 0:00:01[K     |███                             | 102 kB 5.2 MB/s eta 0:00:01[K     |███▎                            | 112 kB 5.2 MB/s eta 0:00:01[K     |███▋                            | 122 kB 5.2 MB/s eta 0:00:01[K     |███▉                            | 133 kB 5.2 MB/s eta 0:00:01[K     |████▏                           | 143 kB 5.2 MB/s eta 0:00:01[K  

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import math
import cv2 as cv
from ipywidgets import Video, Image
import subprocess
import os, glob, shutil
from google.colab.patches import cv2_imshow
from ml4a.canvas import canvas
from ml4a.models import flownet
from ml4a import image

%matplotlib inline

ORIG_FRAMEDIR = 'orig_frames'
FOV_FRAMEDIR = 'fov_frames'

In [None]:
!wget -q --show-progress --no-check-certificate 'https://docs.google.com/uc?export=download&id=19gCZJZ-Y_fYrGpL752tXmiT2jLvk7Kc1' -O andy.mp4

In [None]:
# Uploading the xml file which contains the code for haarcascades face detection model.
!wget https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml

In [None]:
# Test videos are uploaded from our github repo.
!git clone https://github.com/Prime-Nemesis/IVP-PROJECT-TEAM15.git

### Utility Functions

In [None]:
# function to plot images side by side
def show_pair_sidebyside(im1, im2, title1, title2):
  _, ax = plt.subplots(1,2)
  ax[0].title.set_text(title1)
  ax[0].imshow(im1, cmap='gray')
  ax[1].title.set_text(title2)
  ax[1].imshow(im2, cmap='gray'); plt.show()

In [None]:
def draw_boxes(boxes, image, color):
    image = cv.cvtColor(np.asarray(image), cv.COLOR_BGR2RGB)
    for i, box in enumerate(boxes):
        cv.rectangle(
            image,
            (int(box[0]), int(box[1])),
            (int(box[2]), int(box[3])),
            color, 2
        )
    return image

In [None]:
def pre_cleandirs():
  if os.path.exists(ORIG_FRAMEDIR):
      shutil.rmtree(ORIG_FRAMEDIR)
  if os.path.exists(FOV_FRAMEDIR):
      shutil.rmtree(FOV_FRAMEDIR)
  os.mkdir(ORIG_FRAMEDIR)
  os.mkdir(FOV_FRAMEDIR)

In [None]:
def generate_video(frames_dir, video_path, fps):
    if os.path.exists(f'{video_path}'):
      os.remove(f'{video_path}')

    subprocess.call([
        'ffmpeg', '-framerate', f'{fps}', '-i', f'./{frames_dir}/frame%d.png', '-r', '30', '-pix_fmt', 'yuv420p',
        f'{video_path}'
    ])


In [None]:
def get_box_centre(box):
  return [(box[0]+box[2])/2,(box[1]+box[3])/2]
  
def get_center(im):
  xc, yc = int(im.shape[1]/2), int(im.shape[0]/2)

  return (xc, yc)

**Avg video velocity is defined and computed as the avg flow of all the frames in the video**

In [None]:
def avg_video_velocity(orig_video_path: str) -> float:
  '''
  computes the velocity of the video(in pixels/frame) by 
  considering the object of interest.
  '''
  cap = cv.VideoCapture(orig_video_path)

  fin_velocity = 0 #  final velocity of the video that's returned
  part_velocity = 0 # its the sum of velocities of key no.of frames
  ret,frame = cap.read()
  prev_orig_frame = []
  bound_box = []
  
  n = 0
  while ret:
    ret,frame = cap.read()
    if n>0 and ret:
      flow = flownet.run(frame, prev_orig_frame)           # blurring the flow reduces any high-frequency noise in the raw flowmap
      flow = flownet.blur(flow, blur_times=10)
      fin_velocity += np.mean(np.abs(flow))
    prev_orig_frame = frame
    n+=1

  return fin_velocity/n

### Foveation method

In [None]:
def genGaussiankernel(width, sigma):
    # gaussian weighting function is used to smoothen the image before downsampling
    x = np.arange(-int(width/2), int(width/2)+1, 1, dtype=np.float32)
    x2d, y2d = np.meshgrid(x, x)
    kernel_2d = np.exp(-(x2d ** 2 + y2d ** 2) / (2 * sigma ** 2))
    kernel_2d = kernel_2d / np.sum(kernel_2d)
    return kernel_2d

In [None]:
def pyramid(im, sigma=1, prNum=6):
    height_ori, width_ori, ch = im.shape
    G = im.copy()
    pyramids = [G]
    
    # gaussian blur
    # using 5x5 kernel
    Gaus_kernel2D = genGaussiankernel(5, sigma)
    
    # downsample
    for i in range(1, prNum):
        # smoothen the image to prevent sharp artifacts
        G = cv.filter2D(G, -1, Gaus_kernel2D)
        height, width, _ = G.shape
        # down sample the image by half in both the dimensions
        G = cv.resize(G, (int(width/2), int(height/2)))
        pyramids.append(G)
    
    
    # upsample ahead and store for the next stages
    for i in range(1, 6):
        curr_im = pyramids[i]
        for j in range(i):
            if j < i-1:
                im_size = (curr_im.shape[1]*2, curr_im.shape[0]*2)
            else:
                im_size = (width_ori, height_ori)
            curr_im = cv.resize(curr_im, im_size)
            curr_im = cv.filter2D(curr_im, -1, Gaus_kernel2D)
        pyramids[i] = curr_im

    return pyramids

In [None]:
def foveat_img(im, fixs, alpha, p, k):
    """
    im: input image
    fixs: sequences of fixations of form [(x1, y1), (x2, y2), ...]
    These fixation coordinates act as centers of foveations
    
    This function outputs the foveated image with given input image and fixations.
    """
    sigma=0.248
    prNum = 6
    As = pyramid(im, sigma, prNum)
    height, width, _ = im.shape
    
    # coeffs for foveations formulas

    x = np.arange(0, width, 1, dtype=np.float32)
    y = np.arange(0, height, 1, dtype=np.float32)
    x2d, y2d = np.meshgrid(x, y)
    theta = np.sqrt((x2d - fixs[0][0]) ** 2 + (y2d - fixs[0][1]) ** 2) / p
    for fix in fixs[1:]:
        theta = np.minimum(theta, np.sqrt((x2d - fix[0]) ** 2 + (y2d - fix[1]) ** 2) / p)

    # resolution map    
    R = alpha / (theta + alpha)
    
    # transfer function
    Ts = []
    for i in range(1, prNum):
        Ts.append(np.exp(-((2 ** (i-3)) * R / sigma) ** 2 * k))
    Ts.append(np.zeros_like(theta))

    # omega
    omega = np.zeros(prNum)
    for i in range(1, prNum):
        omega[i-1] = np.sqrt(np.log(2)/k) / (2**(i-3)) * sigma

    omega[omega>1] = 1

    # layer index
    layer_ind = np.zeros_like(R)
    for i in range(1, prNum):
        ind = np.logical_and(R >= omega[i], R <= omega[i - 1])
        layer_ind[ind] = i

    # Blending function
    Bs = []
    for i in range(1, prNum):
        Bs.append((0.5 - Ts[i]) / (Ts[i-1] - Ts[i] + 1e-5))

    # Ms: final output layers
    Ms = np.zeros((prNum, R.shape[0], R.shape[1]))

    for i in range(prNum):
        ind = layer_ind == i
        if np.sum(ind) > 0:
            if i == 0:
                Ms[i][ind] = 1
            else:
                Ms[i][ind] = 1 - Bs[i-1][ind]

        ind = layer_ind - 1 == i
        if np.sum(ind) > 0:
            Ms[i][ind] = Bs[i][ind]

    #print('num of full-res pixel', np.sum(Ms[0] == 1))
    # generate periphery image
    im_fov = np.zeros_like(As[0], dtype=np.float32)
    for M, A in zip(Ms, As):
        for i in range(3):
            im_fov[:, :, i] += np.multiply(M, A[:, :, i])

    im_fov = im_fov.astype(np.uint8)
    return im_fov

### Object Detection

In [None]:
obj_detector = cv.CascadeClassifier('haarcascade_frontalface_default.xml')

In [None]:
def get_fixation_point(frame):

  # perform face detection
  bboxes = obj_detector.detectMultiScale(frame)
  rng = np.random.default_rng(12)
  color = rng.uniform(low=0, high=255, size=(3,))

  if len(bboxes)!=0:
    boxes = [(x,y,x+x2,y+y2) for (x,y,x2,y2) in bboxes]
    return get_box_centre(boxes[0])

  else:
    return get_center(frame)


### Implementation of Proposed approach

In [None]:
def foveated_video_processing(orig_video_path: str, key=5, visual_angle=1.5, pixel_density=7.5, k=3) -> str:
    """
    Returns foveated video path after the processing

    pixel_density: number of pixels a person can see in a degree of visual angle
                          which can be changed to simulate different viewing distances
    visual_angle: half-angle height means that when θ(x, y) = α the image will become only
                         half the resolution of the center of attention (θ(x, y) = 0),
                         where θ is the visual angel subtended by center of foveation
    k: helper param for the transfer function in foveation
    key: period by which the foveation is scheduled.

    """
    pre_cleandirs()

    cap = cv.VideoCapture(orig_video_path)
    fps = cap.get(cv.CAP_PROP_FPS)
    print(f'fps={fps}')
    ret, frame = cap.read()

    N_orig = 0
    while ret:
        cv.imwrite(f"./{ORIG_FRAMEDIR}/frame%d.png" % N_orig, frame)
        ret, frame = cap.read()
        N_orig = N_orig + 1

    prev_orig_frame = []
    prev_fov_frame = []

    for i in range(N_orig):

        frame = cv.imread(f"./{ORIG_FRAMEDIR}/frame%d.png" % i)

        if i % key:

            flow = flownet.run(frame,
                               prev_orig_frame)
            # blurring the flow reduces any high-frequency noise in the raw flow map
            flow = flownet.blur(flow, blur_times=10)

            map = flownet.flow_to_mapping(flow)
            fov_frame = canvas.map_image(prev_fov_frame, map)

        else:

            (x, y) = get_fixation_point(frame)
            fov_frame = foveat_img(frame, [(x, y)], visual_angle, pixel_density, k)

        cv.imwrite(f"./{FOV_FRAMEDIR}/frame%d.png" % i, fov_frame)
        prev_orig_frame = frame
        prev_fov_frame = fov_frame

        path_split = orig_video_path.split(".", 1)

    fov_video_path = path_split[0] + '_fov.' + path_split[1]
    generate_video(FOV_FRAMEDIR, fov_video_path, fps)

    return fov_video_path

## Testing and Experimentaion

In [None]:
# Original video path
orig_video_path = 'andy.mp4'

In [None]:
orig_video = Video.from_file(orig_video_path)
orig_video

In [None]:
 # due to drastic change in zoom optical flow is causing some wrong approximation
fov_video_path = foveated_video_processing(orig_video_path, key=3, visual_angle=2)
fov_video = Video.from_file(fov_video_path)
fov_video

In [None]:
avg_video_velocity('andy.mp4')

In [None]:
# test_video_path = 'IVP-PROJECT-TEAM15/test_videos/astronaut_crew.mp4'
# test_video = Video.from_file(test_video_path)
# test_video

In [None]:
# fov_video_path = foveated_video_processing(test_video_path, key=5, visual_angle=2)
# fov_video_test = Video.from_file(fov_video_path)
# fov_video_test

In [None]:
# avg_video_velocity('IVP-PROJECT-TEAM15/test_videos/astronaut_crew.mp4')