#### Installing Required Libraries.

#### Colab Specific Code (Not for Review)

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


#### Importing Required Libraries

In [None]:
import sys
import os
import cv2
import csv
import pandas as pd
import numpy  as np
import re

#### Generic Function to Make Required Directories

In [None]:
def make_dirs(*dirs):
  for directory in dirs:
    os.makedirs(directory, exist_ok = True)

#### Function to Resize Frames and Convert them to GreyScale.

In [None]:
def convert_to_greyscale(root_folder,Content_folder, video_path, video_file, frames_dir):
  # By default the Frame and width size for easy OCR is 2500, any image with resolution more than 2500  will be converted to 2500 before applying easy ocr.
  # in our use case 2000*2000 image size was working good. 
  # you can experiment with thsese values and can adjust as per the requirement.
  # reference link for pytesseract(https://pypi.org/project/pytesseract/) and for keras ocr(https://keras-ocr.readthedocs.io/en/latest/api.html),
  # Note: By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
  # we need to convert from BGR to RGB format/mode:
  #img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
  # Before OCRing it's good to convert the image to gray scale,Why? please refer below link.
  #(https://www.researchgate.net/figure/A-comparative-study-of-the-OCR-systems-using-color-and-gray-scale-images-with-or-without_fig3_310645810) 
  Frame_width=2000
  Frame_height=2000
  count = 0
  nc = 0
  frames_path = os.path.join(Content_folder, video_file, frames_dir)
  for filename in os.listdir(frames_path):
    if filename.endswith(".jpg"):
      #Converting frames to gray scale
      image = cv2.imread(os.path.join(frames_path, filename))
      Gray_image = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
      
      #saving gray scale frames
      gray_frame_vid_folder = os.path.join(Content_folder, video_file, "Gray_frames")
      make_dirs(gray_frame_vid_folder)
      Gray_File_name="Gray_Frame_"+video_file+"_"+"{0:0=2d}".format(nc)+".jpg"
      
      # Saving the gray images 
      gray_path = os.path.join(gray_frame_vid_folder, Gray_File_name) 
      cv2.imwrite(gray_path,Gray_image)
      
      #Resizing the gray frames
      Gray_image = cv2.resize(Gray_image,(Frame_width,Frame_height))
      resize_gray_frame_dir = os.path.join(Content_folder, video_file, "Resized_Gray_frames")
      make_dirs(resize_gray_frame_dir)
      Resized_Gray_File_name="Resized_Gray_frames_"+video_file+"_"+"{0:0=2d}".format(nc)+".jpg"
      resize_gray_path = os.path.join(resize_gray_frame_dir, Resized_Gray_File_name)
      
      # Saving the gray images 
      cv2.imwrite(resize_gray_path,Gray_image)
      
      count = count + 1
      nc = nc + 1

#### Function to extract File Names of all the Videos present inside the folder, based on the accepted extensions.

In [None]:
def extract_videos(path, extensions):
  #Getting list of All videos 
  files = os.listdir(path)
  vdofiles = []
  vdopaths = []
  for file in files:
    file_arr = file.split('.')
    filename = file_arr[0]
    extension = file_arr[1]
    if extension in extensions:
      vdofiles.append(filename)
      vdopaths.append(os.path.join(path, file))
  return vdofiles, vdopaths

#### Main Function.

In [None]:
def main():
  root_folder ="/content/gdrive/My Drive/vid"
  Content_folder="/content/gdrive/My Drive/Extracted_content"
  VdoAD = os.path.join(root_folder, "VDO-AD")
  make_dirs(VdoAD)
  extensions = ['mp4', 'mkv','avi']
  video_files, video_paths = extract_videos(VdoAD, extensions)
  for itr in range(len(video_paths)):
    convert_to_greyscale(root_folder,Content_folder, video_paths[itr], video_files[itr], "Raw_Frames")

In [None]:
main()