#### Installing Required Packages.

In [None]:
 pip install pydub  

Collecting pydub
  Downloading https://files.pythonhosted.org/packages/7b/d1/fbfa79371a8cd9bb15c2e3c480d7e6e340ed5cc55005174e16f48418333a/pydub-0.24.1-py2.py3-none-any.whl
Installing collected packages: pydub
Successfully installed pydub-0.24.1


#### Colab Specific Code (Not for Review)

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


#### Importing Required Libraries

In [None]:
import sys
import os
import cv2
import csv
import pandas as pd
import numpy  as np
import re
import moviepy.editor as mp
from pydub import AudioSegment
from moviepy.editor import VideoFileClip

Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)1441792/45929032 bytes (3.1%)4284416/45929032 bytes (9.3%)7725056/45929032 bytes (16.8%)11067392/45929032 bytes (24.1%)14467072/45929032 bytes (31.5%)17825792/45929032 bytes (38.8%)20897792/45929032 bytes (45.5%)23928832/45929032 bytes (52.1%)27303936/45929032 bytes (59.4%)30687232/45929032 bytes (66.8%)34111488/45929032 bytes (74.3%)37462016/45929032 bytes (81.6%)4

#### Generic Function to make Required Directories

In [None]:
def make_dirs(*dirs):
  for directory in dirs:
    os.makedirs(directory, exist_ok = True)

#### Match Target Amplitude
- Audio loudness normalization
- Peak normalization is not always helpful and it appears that ffmpeg supports loudness normalization, where gain is applied to achieve an average amplitude level.
- rms is a measure of average amplitude, which pydub provides as audio_segment.rms and also provides a convenience method that provides this value converted to dBFS (audio_segment.dBFS)


In [None]:
def match_target_amplitude(sound, target_dBFS):
    change_in_dBFS = target_dBFS - sound.dBFS
    return sound.apply_gain(change_in_dBFS)

#### Function to Extract Frames from Given Videos.

In [None]:
def extract_frames(root_folder, Content_folder,video_path, video_file, success):
  count = 0
  nc = 0
  while success:
    vidcap = cv2.VideoCapture(video_path)
    # extract frame every one second
    
    vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))
    success,image = vidcap.read() # returns a boolean. Returns True if frame is read. Is used to check end of end of video.
    
    ## Stop when last frame is identified
    image_last = cv2.imread("frame{}.png".format(count-1))
    if np.array_equal(image,image_last):
        break
    File_name="Raw_Frame_"+video_file+"_"+"{0:0=2d}".format(nc)+".jpg"
    
    #Saving raw frames
    frame_dir = os.path.join(Content_folder, video_file, "Raw_Frames")
    make_dirs(frame_dir)
    frame_path = os.path.join(Content_folder, video_file, "Raw_Frames", File_name)
    cv2.imwrite(frame_path,image)
    count = count + 1
    nc = nc + 1

#### Function to Extract Audio From Videos.

In [None]:
# Use AudioSegment to extract audio

def extract_audio(root_folder,Content_folder, video_path, video_file):

  # Getting the audio file from current video
  
  clip = mp.VideoFileClip(video_path)
  
  print("Duration of video : ", clip.duration)
  print("FPS : ", clip.fps)  # Frame per second

  # Assinging the name of audio file
  Audio_name=video_file+".wav"
  
  # Saving the extracted Audio with "Audio_name" in respective folder.
  Audio_folder_Path = os.path.join(Content_folder, video_file, "Full_audio")
  
  #making required directories
  make_dirs(Audio_folder_Path)

  # writing audio file in the corresponding video folder.
  audio_path = os.path.join(Audio_folder_Path, Audio_name)
  clip.audio.write_audiofile(audio_path)
  
  # Normailizing Extracted audio and saving in Audio_Folder
      
  #Folder of raw audio file extarcted from video
  Normalized_Audio_name = video_file + "_Normalized_Audio.wav"
  os.path.join(Audio_folder_Path,Normalized_Audio_name)
  
  # Path to store the normalized version of audio
  Audio_Normalized_path=os.path.join(Content_folder,video_file,"Normalised_Audio")
  
  # making required directories.
  make_dirs(Audio_Normalized_path)

  #Reading Raw audio file
  sound = AudioSegment.from_file(audio_path, "wav")
  # Note :- IBm watson provide inbuilt frame rate adjustment and normalization. 
  # If you are not using IBM_watson,please uncomment below normalization code line.
  #Calling Normalized function 
  #normalized_sound = match_target_amplitude(sound, -20.0)
  #changing the frame rate of extracted audio to 16Khz
  normalized_sound =sound.set_frame_rate(16000)
  normalized_audio_path = os.path.join(Audio_Normalized_path,Normalized_Audio_name)
  #Saving the normalized version of audio
  normalized_sound.export(normalized_audio_path, format="wav")

#### Function to extract File Names of all the Videos present inside the folder, based on the accepted extensions.


In [None]:
def extract_videos(path, extensions):
  #Getting list of All videos 
  files = os.listdir(path)
  vdofiles = []
  vdopaths = []
  print("List of videos present in Root Folder with extensions")
  for file in files:
    file_arr = file.split('.')
    print(file_arr)
    filename = file_arr[0]
    extension = file_arr[1]
    if extension in extensions:
      vdofiles.append(filename)
      vdopaths.append(os.path.join(path, file))
  return vdofiles, vdopaths

#### The Main Function

In [None]:
def main():
  root_folder ="/content/gdrive/My Drive/vid"
  Content_folder="/content/gdrive/My Drive/Extracted_content"
  VdoAD = os.path.join(root_folder, "VDO-AD")
  make_dirs(VdoAD)
  extensions = ['mp4', 'mkv','avi']
  video_files, video_paths = extract_videos(VdoAD , extensions)
  for itr in range(len(video_paths)):
    extract_frames(root_folder ,Content_folder, video_paths[itr], video_files[itr], True)
    extract_audio(root_folder ,Content_folder, video_paths[itr], video_files[itr])

In [None]:
main()

List of videos present in Root Folder with extensions
['VDO-AD-200', 'mp4']
['VDO-AD-105', 'mp4']
Duration of video :  29.97
FPS :  29.97002997002997
[MoviePy] Writing audio in /content/gdrive/My Drive/Extracted_content/VDO-AD-200/Full_audio/VDO-AD-200.wav


100%|██████████| 661/661 [00:00<00:00, 1682.84it/s]

[MoviePy] Done.





Duration of video :  29.85
FPS :  29.97002997002997
[MoviePy] Writing audio in /content/gdrive/My Drive/Extracted_content/VDO-AD-105/Full_audio/VDO-AD-105.wav


100%|██████████| 659/659 [00:00<00:00, 1486.94it/s]

[MoviePy] Done.



