<a href="https://colab.research.google.com/github/MS1997/Apparent-personality-analysis-using-videos/blob/master/Extracting_Visual_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# importing the required packages 
import cv2 
from zipfile import ZipFile
import shutil
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# mounting the drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# unzip folders containing the data from the drive 

In [None]:
# ************ Extracting training videos ************  
# l is list of  the folder numbers inside the training data folders 
f1 = 1 # lowest folder number 
f2 = 13 # highest folder number + 1
l = list(map(str,range(f1,f2)))
for i in l:
  file_name = 'training80_'+i+'.zip'
  folder_to_extract = 'training80_'+i
  print(folder_to_extract)
  # make directory of the same name 
  os.mkdir(folder_to_extract)

  zf = ZipFile(file_name, 'r')
  zf.extractall(folder_to_extract)
  zf.close()

  # delete orginal folder
  os.remove(file_name)



In [None]:
# ************ Extracting validation videos ************ 
# l is list of  the folder numbers inside the validation data folders 
f1 = 1 # lowest folder number 
f2 = 13 # highest folder number + 1
l_1 = list(map(str,range(f1,f2)))
for i in l_1:
  file_name = 'validation80_'+i+'.zip'
  folder_to_extract = 'validation80_'+i
  print(folder_to_extract)
  #make directory of the same name 
  os.mkdir(folder_to_extract)

  zf = ZipFile(file_name, 'r')
  zf.extractall(folder_to_extract)
  zf.close()

  #delete orginal folder
  os.remove(file_name)


In [None]:
# To get the test files a password is required which is given on the first impressions challenge website 
# and the same code can be used to extract the test videos 

In [None]:
count = 0
# getting the total number of videos in the training data folder  
for i in l:
  folder_name = 'training80_'+i # Change here for validation, test sets
  count += len(os.listdir(folder_name))
print('total training video files are: ',count)

In [None]:
# lets look at the avg length of the videos 

def count_frames(path):
  video = cv2.VideoCapture(path)
  total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
  rate = video.get(cv2.CAP_PROP_FPS)
  return total, rate

In [None]:
for i in l:
  total_time = 0
  folder_name = 'training80_'+i # Change here for validation, test sets
  for j in os.listdir(folder_name):
    path = folder_name+'/'+j
    total, rate = count_frames(path)
    time = total / rate 
    total_time += time
  print(f'Avg length of videos in folder {folder_name} is {total_time/len(os.listdir(folder_name))} secs')


In [None]:
# Check if videos are in RGB format 
# cv2.CAP_PROP_CONVERT_RGB is a booloean flag indicating if the videos need to be converted to RGB or not i.e. False/ 0 = no need to convert 

# Loop through all video files and see if any video needs to be converted i.e. its not in RGB format 
for i in l:
  total_time = 0
  folder_name = 'training80_'+i # Change here for validation, test sets
  print('Entering folder ',folder_name)
  for j in os.listdir(folder_name):
    path = folder_name+'/'+j
    video = cv2.VideoCapture(path)
    rgb_flg = video.get(cv2.CAP_PROP_CONVERT_RGB)

    if rgb_flg == True:
      print(path, 'Needs to be converted') 
      
# All flags were false hence no need to convert 

In [None]:
# function to extract 15 frames from each video and store it in a folder 

def extract_frames(path, flag = 'train'):

  name = path.split('/')[1]
  file_name = name.split('.mp4')[0]
  cap = cv2.VideoCapture(path)

  # make folder to save extracted frames 
  try:
    if flag == 'validation':
      path_ = 'val_frames/'
      os.makedirs(path_ + file_name)
    elif flag == 'test':
      path_ = 'test_frames/'
      os.makedirs(path_ + file_name)
    else:
      path_ = 'frames/'
      os.makedirs(path_ + file_name)
  except FileExistsError:
    print('Folder already exists')

  # counter variable 
  count =0 
  # flag for knowing if frames were successfully extracted 
  success = 1

  total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)

  # set frame limit to 100 using cap.set() which sets the camera parameters 
  cap.set(cv2.CAP_PROP_FRAME_COUNT, 16) # starts count from 1 not 0 hence, 16 not 15


  while success:
    count +=1

    if count >= total_frames or count == 16:
      break

    success, frame = cap.read()
    # resizing frame to save space  and also for modelling
    frame = cv2.resize(frame,(150, 150), interpolation= cv2.INTER_AREA)  # cv2.INTER_AREA good for shrinking images 
    # interpolation is used to estimate the values of the unknown pixels, bicubic interpolation uses 4X4  pixels 
    # i.e. it uses weighed average of 16 pixels, unknown pixels can be at different distances. it gives higher weight to closer pixels. 
    
    # save the frame
    frame_path = path_ + file_name + '/' + file_name +'_' +str(count) + '.jpg'
    
    cv2.imwrite(frame_path, frame)

    if cv2.waitKey(1) & 0xFF == ord('q'): # give the user one millisecond to press 'q' key to abort
      break
  
  cap.release()

In [None]:
# ************* Extracting frames from Train Videos ******************
try:
  os.makedirs('frames') # This folder will contain the folders for each video which will contain 15 frames for that video 
except FileExistsError:
    print('Frames Folder already exists')

# loop to extract the frames 
for i in l:
  folder_name = 'training80_'+i
  print('Entering folder ',folder_name)

  for j in os.listdir(folder_name):
    path = folder_name+'/'+j
    extract_frames(path)


In [None]:
# ************* Extracting frames from Validation Videos ******************
try:
  os.makedirs('val_frames') # This folder will contain the folders for each video which will contain 15 frames for that video 
except FileExistsError:
    print('Validation Frames Folder already exists')

# loop to extract the frames 
for i in l:
  folder_name = 'validation80_'+i
  print('Entering folder ',folder_name)

  for j in os.listdir(folder_name):
    path = folder_name+'/'+j
    extract_frames(path, flag = 'validation')

In [None]:
# ************* Extracting frames from Test Videos ******************
try:
  os.makedirs('test_frames') # This folder will contain the folders for each video which will contain 15 frames for that video 
except FileExistsError:
    print('Test Frames Folder already exists')

count = 0
for j in os.listdir('test'):
  path = 'test'+'/'+j
  extract_frames(path, flag = 'test')

  count +=1
  if count %10 ==0:
    print(count)

In [None]:
!zip -r gdrive/'My Drive'/frames_1.zip frames # zip the folder containing the frames and store it on the drive