## CELEB-DF 資料集
- https://drive.google.com/file/d/1SnCZ_DQTPxLs0ttOO9sPh2cA1fcFLLFE/view?usp=sharing

篩選過後的資料
- https://drive.google.com/file/d/1-9ngKHZ5QN2bb6kTRgKGNqjAguwSbtix/view?usp=sharing

In [None]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#@title 從Google Drive 匯入影片資料
train_data_path = '/content/drive/MyDrive/aidataset/df_train.zip' #@param {type:"string"}
cmd  = f'cp {train_data_path} ./df_train.zip'
! $cmd
! unzip df_train.zip


In [None]:
#@title 從訓練影片切割影格 (Frame)
import cv2
from os import makedirs
from os.path import join, exists
import glob

training_videos_folder = ["train/0","train/1"]

for folder in training_videos_folder:
    videos_path = glob.glob(join(folder, "*.mp4"))
    folder = folder.split("/")[1]

    counter = 0
    for video_path in videos_path:
        cap = cv2.VideoCapture(video_path)
        vid = video_path.split("/")[-1]
        vid = vid.split(".")[0]
        frameRate = cap.get(5)  # frame rate

        if not exists("/content/train_frames/" + folder + "/video_" + str(int(counter))):
            makedirs("/content/train_frames/" + folder + "/video_" + str(int(counter)))

        while cap.isOpened():
            frameId = cap.get(1)  # current frame number
            ret, frame = cap.read()
            if not ret:
                break

            filename = (
                "/content/train_frames/"
                + folder
                + "/video_"
                + str(int(counter))
                + "/image_"
                + str(int(frameId) + 1)
                + ".jpg"
            )
            cv2.imwrite(filename, frame)

        cap.release()

        if counter % 10 == 0:
            print("Number of videos done:", counter)
        counter += 1

In [None]:
#@title 刪除多餘影格 (選擇性)

vid_thresh = 100 #@param {type:"integer"}
import os

def delete_extra_frames(path):
  for v in os.listdir(path):
    for f in os.listdir(path + v):
      vid = int(f.split('_')[1].strip('.jpg'))
      if vid > vid_thresh:
        os.remove(path + v + '/' + f)

path = 'train_frames/0/'
delete_extra_frames(path)

path = 'train_frames/1/'
delete_extra_frames(path)

In [None]:
#@title 安裝 facenet_pytorch
! pip install facenet_pytorch

In [None]:
#@title 取得臉部資訊

from facenet_pytorch import MTCNN
import cv2
from PIL import Image

from os import listdir, makedirs
import glob
from os.path import join, exists
from skimage.io import imsave
import imageio.core.util

import warnings
warnings.filterwarnings("ignore")

def ignore_warnings(*args, **kwargs):
    pass


imageio.core.util._precision_warn = ignore_warnings


mtcnn = MTCNN(
    margin=40,
    select_largest=False,
    post_process=False,
    device="cuda:0"
)




def extractFaces(source_frames_folders,dest_faces_folder):
  for i in source_frames_folders:
      counter = 0
      for j in listdir(i):
          imgs = glob.glob(join(i, j, "*.jpg"))
          if counter % 1000 == 0:
              print("Number of videos done:", counter)
          if not exists(join(dest_faces_folder, j)):
              makedirs(join(dest_faces_folder, j))
          for k in imgs:
              frame = cv2.imread(k)
              frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
              frame = Image.fromarray(frame)
              face = mtcnn(frame)

              try:
                  imsave(
                      join(dest_faces_folder, j, k.split("/")[-1]),
                      face.permute(1, 2, 0).int().numpy(),
                  )
              except AttributeError:
                  print("Image skipping")
          counter += 1

source_frames_folders = ["/content/train_frames/0/"]
dest_faces_folder = "/content/train_face/0/"
extractFaces(source_frames_folders,dest_faces_folder)

source_frames_folders = ["/content/train_frames/1/"]
dest_faces_folder = "/content/train_face/1/"
extractFaces(source_frames_folders,dest_faces_folder)

In [None]:
#@title 將臉部圖片資料壓縮成npy 
import cv2
import argparse
import numpy as np
from keras.utils import np_utils
import glob
from os.path import join
from os import listdir
from random import shuffle


train_path = ["/content/train_face/1", "/content/train_face/0"]

list_1 = [join(train_path[0], x) for x in listdir(train_path[0])]
list_0 = [join(train_path[1], x) for x in listdir(train_path[1])]

c = 0
img_size = 160
frames_per_video = 25
print(len(list_1), len(list_0))
train_data = []
train_label = []

count = 0

images = []
labels = []

counter = 0
vid_list = list_0 + list_1
print(vid_list)
for x in vid_list:
    img = glob.glob(join(x, "*.jpg"))
    img.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
    images += img[: frames_per_video]
    label = [k.split("/")[3] for k in img]

    labels += label[: frames_per_video]

    if counter % 1000 == 0:
        print("Number of files done:", counter)
    counter += 1


for j, k in zip(images, labels):

    img = cv2.imread(j)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(
        img, (img_size, img_size), interpolation=cv2.INTER_AREA
    )
    train_data.append(img)
    train_label += [k]

    if count % 10000 == 0:
        print("Number of files done:", count)
    count += 1

train_data = np.array(train_data)
train_label = np.array(train_label)
train_label = np_utils.to_categorical(train_label)
print(train_data.shape, train_label.shape)

np.save("train_data_" + str(frames_per_video) + "_c40.npy", train_data)
np.save("train_label_" + str(frames_per_video) + "_c40.npy", train_label)


4 4
['/content/train_face/0/video_4', '/content/train_face/0/video_3', '/content/train_face/0/video_2', '/content/train_face/0/video_1', '/content/train_face/1/video_4', '/content/train_face/1/video_3', '/content/train_face/1/video_2', '/content/train_face/1/video_1']
Number of files done: 0
Number of files done: 0
(200, 160, 160, 3) (200, 2)


In [None]:
#@title 將打包檔案搬回Google Drive
dst_path = '/content/drive/MyDrive/aidataset/' #@param {type:"string"}
cmd  = f'cp train_*.npy {dst_path}'
! $cmd