In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import gdown

# File url
file_id = "1TbnATpxdtC7SECCMVab4OaO3u-Y0cz9F"
url = f"https://drive.google.com/uc?id={file_id}"

gdown.download(url, "real.zip", quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1TbnATpxdtC7SECCMVab4OaO3u-Y0cz9F
From (redirected): https://drive.google.com/uc?id=1TbnATpxdtC7SECCMVab4OaO3u-Y0cz9F&confirm=t&uuid=6b661ddc-18ad-4f27-9c2c-142c6dae5596
To: /content/real.zip
100%|██████████| 1.61G/1.61G [00:18<00:00, 85.9MB/s]


'real.zip'

In [None]:
import os
import zipfile
extract_path = "/content/extracted_real"

# Create directory if it does not exist
if not os.path.exists(extract_path):
    os.makedirs(extract_path)

# Extract the zip file
with zipfile.ZipFile("real.zip", "r") as zip_ref:
    zip_ref.extractall(extract_path)

print("Extraction complete! Files are in:", extract_path)

Extraction complete! Files are in: /content/extracted_real


In [None]:
import glob
import numpy as np
import cv2
import copy

video_files =  glob.glob('/content/extracted_real/*.mp4')

frame_count = []
for video_file in video_files:

  cap = cv2.VideoCapture(video_file)   # Opens the video file

  # Skip videos that have frames less than 150
  if(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))<150):
    video_files.remove(video_file)
    continue

  frame_count.append(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))   # Frame count

print("Frames:" , frame_count)
print("Total number of videos: " , len(frame_count))
print('Average frame per video:',np.mean(frame_count))

Frames: [323, 319, 460, 450, 437, 331, 491, 326, 397, 514, 450, 332, 250, 472, 481, 297, 460, 479, 282, 368, 463, 317, 345, 332, 570, 339, 505, 416, 235, 464, 314, 317, 391, 289, 434, 473, 321, 332, 463, 310, 488, 389, 295, 468, 449, 269, 451, 469, 413, 416, 527, 459, 457, 462, 484, 447, 315, 459, 417, 303, 582, 422, 477, 410, 272, 429, 315, 471, 461, 467, 468, 495, 473, 315, 172, 483, 447, 335, 404, 398, 555, 209, 335, 458, 450, 373, 492, 499, 379, 428, 433, 429, 459, 421, 448, 399, 466, 318, 319, 323, 467, 490, 483, 342, 493, 463, 422, 468, 318, 321, 340, 585, 445, 320, 361, 478, 469, 331, 468, 452, 459, 325, 335, 464, 470, 454, 417, 502, 383, 380, 317, 319, 510, 445, 526, 199, 497, 489, 455, 464, 466, 530, 353, 469, 516, 455, 494, 318, 461, 455, 477, 478, 316, 573, 376, 547, 534, 332, 448, 310, 470, 494, 571, 553, 492, 330, 327, 442, 337, 312, 310, 477, 478, 456, 321, 460, 432, 541, 485, 349, 460, 322, 387, 338, 410, 453, 472, 467, 308, 478, 512, 490, 477, 461, 328, 520, 420, 448, 4

In [None]:
# to extract frame
def frame_extract(path):
  vidObj = cv2.VideoCapture(path)
  success = 1
  while success:
      success, image = vidObj.read()
      if success:
          yield image  # Returns frame

In [2]:
!pip install face_recognition
!mkdir '/content/drive/My Drive/preprocessedDataset'
!mkdir '/content/drive/My Drive/preprocessedDataset/real'

Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566162 sha256=67ed2c6f3055e732d476974dd24328f5ab785635693c01a8798f984398e16c30
  Stored in directory: /root/.cache/pip/wheels/04/52/ec/9355da79c29f160b038a20c784db2803c2f9fa2c8a462c176a
Successfully built face-recognition-models
Installing collected packages: face-recog

In [3]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import face_recognition
from tqdm import tqdm

# process the frames
def create_face_videos(path_list,out_dir):

  # No. of video already preprocessed
  already_present_count = glob.glob(os.path.join(out_dir, '*.mp4'))

  print("No of videos already present " , len(already_present_count))

  # Skip already preprocessed videos
  for path in tqdm(path_list):
    out_path = os.path.join(out_dir,path.split('/')[-1])
    file_exists = glob.glob(out_path)

    if(len(file_exists) != 0):
      print("File Already exists: " , out_path)
      continue

    frames = []
    flag = 0
    face_all = []
    frames1 = []

    # Video writer object to convert frames to video
    out = cv2.VideoWriter(out_path,cv2.VideoWriter_fourcc('M','J','P','G'), 30, (112,112))

    for idx,frame in enumerate(frame_extract(path)):
      if(idx <= 150):
        frames.append(frame)
        if(len(frames) == 4):
          faces = face_recognition.face_locations(frame, model='hog')

          for i,face in enumerate(faces):
            if face:
              top,right,bottom,left = face
            try:
              out.write(cv2.resize(frames[i][top:bottom,left:right,:],(112,112)))
            except:
              pass

          frames = []
    try:
      del top,right,bottom,left
    except:
      pass

    out.release()

print("Data preprocessed!")

Data preprocessed!


In [None]:
create_face_videos(video_files,'/content/drive/My Drive/preprocessedDataset/real')