<a href="https://colab.research.google.com/github/Karthick47v2/mock-buddy/blob/main/Data_visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install kaggle
!pip3 install kaggle

# copy kaggle.json to required dir and give permission
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# download dataset
!kaggle datasets download selfishgene/youtube-faces-with-facial-keypoints

In [None]:
# unzip
!unzip youtube-faces-with-facial-keypoints.zip

In [1]:
# import libs
# data visualization
import pandas as pd
# easy file access
import glob
# np array
import numpy as np
# plot
import matplotlib.pyplot as plt
# image processing
import cv2

In [None]:
# read csv
csv_file = pd.read_csv('/content/youtube_faces_with_keypoints_full.csv')

In [None]:
csv_file.head(10)

In [None]:
print("Total videos : ", csv_file.shape[0])
print("Unique videos : ", len(csv_file['personName'].unique()))

In [None]:
# make sure there are no unwanted npz files in the directory coz we are going to search the whole dir for npz files
# using iglob to prevent unnecessary load to RAM
npz_path_list = glob.glob('/content/**/*.npz', recursive=True)

In [None]:
# get all video id
# npzPathList contains path -> xxxx/xxxx/xxx/yyy.npz....we need yyy ...
# inorder to get that
# get yyy.npz (split by '/' and get last element)
# get yyy (split by '.' and get 1st element)
vidIDs = [x.split('/')[-1].split('.')[0] for x in npz_path_list]

In [None]:
# now map video id with full path (csv contains vidID so we need it to map with path)
vid_paths = {}

for vidID, vid_path in zip(vidIDs, npz_path_list):
  vid_paths[vidID] = vid_path

In [None]:
# just making sure no data is missing
csv_file = csv_file.loc[csv_file.loc[:, 'videoID'].isin(vid_paths.keys()), :].reset_index(drop=True)

In [None]:
# get hands-on the data AKA play with data
# visualize 4 frames of 4 different videos with landmarks
np.random.seed(42)
num_vids = 4
num_frames = 4
frames_list = np.array([0.1, 0.3, 0.6, 0.9])

# get rand vid ids
sample_vids = csv_file.loc[np.random.choice(csv_file.index, size=num_vids, replace=False), 'videoID']

In [None]:
# lets see the size of single frame and landmarks obj
for i, vidID in enumerate(csv_file['videoID']):
  if i == 1:
    break

  np_file = np.load(vid_paths[vidID])
  col_img = np_file['colorImages']
  landmarks2D = np_file['landmarks2D']
  
  frame_h, frame_w = col_img[:, :, :, 0].shape[:2]
  scale_h, scale_w = 192 / frame_h, 192 / frame_w

  landmarks = landmarks2D[:, :, 0]
  landmarks[:, 0] = landmarks[:, 0] * scale_w
  landmarks[:, 1] = landmarks[:, 1] * scale_h 
  landmarks = landmarks.astype(np.float32)
  print(landmarks.size, landmarks.itemsize, landmarks.size * landmarks.itemsize, 'bytes')

  frames = (cv2.resize(col_img[:, :, :, 0], (192, 192))).astype(np.uint8)
  print(frames.size, frames.itemsize, frames.size * frames.itemsize, 'bytes')

In [None]:
# visualize data using matplotlib
fig, arr = plt.subplots(nrows=num_vids, ncols=num_frames, figsize=(14,18))

for i, videoID in enumerate(sample_vids):
  # load nparray
  np_file = np.load(vid_paths[videoID])
  # it contains colorimages, boundingbox, landmarks2D
  # color image shape is 4 (x, y, RGB, no_of_frames)
  col_img = np_file['colorImages']
  landmarks = np_file['landmarks2D']

  # generate color image from np array (uin8 -> 0 - 255)
  frames = (frames_list * (col_img.shape[3] - 1)).astype(np.uint8)

  # generate image, landmarks
  for j, frame in enumerate(frames):
    arr[i][j].imshow(col_img[:, :, :, frame])
    arr[i][j].scatter(x=landmarks[:,0,frame], y=landmarks[:, 1, frame], s=3, c='r')

In [None]:
# save resized images and resized landmarks separately for future usage
# total 2194 -- but i am taking 2000 video files and getting each 45th frame from each video (0, 45, 90..)
# so total training datatset length ~ 6350 (224x224x3 np format)
frames = []
landmark_xy = []
img_size = 224     # resize all frames to 224 x 224
frame_cut = 30

for i, vidID in enumerate(csv_file['videoID']):
  # as it contains each frames of video, we can omit many frames 
  # here i am only using 1 frame per video

  np_file = np.load(vid_paths[vidID])
  col_img = np_file['colorImages']
  landmarks2D = np_file['landmarks2D']
  
  n = int(((col_img.shape[3] - 1) / frame_cut) + 1)
  for j in range(n):
    frame_h, frame_w = col_img[:, :, :, j * frame_cut].shape[:2]
    scale_h, scale_w = img_size / frame_h, img_size / frame_w

    landmarks = landmarks2D[:, :, j * frame_cut]
    landmarks[:, 0] = landmarks[:, 0] * scale_w
    landmarks[:, 1] = landmarks[:, 1] * scale_h 
    landmark_xy.append(landmarks.astype(np.float32))

    frames.append(cv2.resize(col_img[:, :, :, j * frame_cut], (img_size, img_size)).astype(np.uint8))

frames = np.array(frames).astype(np.uint8)
landmark_xy = np.array(landmark_xy).astype(np.float32)

In [None]:
# arrage to model train format
y_data = landmark_xy.reshape(landmark_xy.shape[0], -1)
y_train = np.reshape(y_data, (-1, 1, 1, 136)) / img_size

In [None]:
#check 
fig, arr = plt.subplots(nrows=1, ncols=4, figsize=(15,15))

for i in range(4):
  arr[i].imshow(frames[i])
  x = np.reshape(y_train[i, :, :, np.arange(0, 136, 2)], (68)) * img_size
  y = np.reshape(y_train[i, :, :, np.arange(1, 136, 2)], (68)) * img_size
  arr[i].scatter(x, y, s=3, c='r')

In [None]:
# save np array for future use (training model)
np.save('frames30.npy', frames)
np.save('y_train30.npy', y_train)

In [None]:
# move the saved files to google drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!mv y_train30.npy frames30.npy /content/gdrive/MyDrive/data-v/

In [None]:
# save resized images and resized landmarks separately for test dataset
# here taking only 1 frame from each vid file. (upto 100 vids)
test_frames = []
test_bbox = []
img_size = 224     # resize all frames to 224 x 224

for i, vidID in enumerate(csv_file['videoID']):
  # as it contains each frames of video, we can omit many frames 
  # here i am only using 1 frame per video

  if i == 100:
    break

  np_file = np.load(vid_paths[vidID])
  col_img = np_file['colorImages']
  frame_h, frame_w = col_img[:, :, :, 0].shape[:2]
  scale_h, scale_w = img_size / frame_h, img_size / frame_w

  test_frames.append(cv2.resize(col_img[:, :, :, 3], (img_size, img_size)).astype(np.uint8))

test_frames = np.array(test_frames).astype(np.uint8)

In [None]:
fig,arr = plt.subplots(nrows=1,ncols=4,figsize=(15,15))
for i in range(4):
    arr[i].imshow(test_frames[i])



In [None]:
# save np array for future use (testing model)
np.save('test_frames.npy', test_frames)

In [None]:
!mv test_frames.npy /content/gdrive/MyDrive/data-v/

In [None]:
###################### IMAGE AUGMENTATION ############################

In [3]:
y_train = np.load('/content/gdrive/MyDrive/data-v/y_train30.npy')
frames = np.load('/content/gdrive/MyDrive/data-v/frames30.npy')

In [4]:
# plot for debugging
def plot_sample(frame, keypoint, axis):
    axis.imshow(frame)
    x = np.reshape(keypoint[:, :, np.arange(0, 136, 2)], (68)) * img_size
    y = np.reshape(keypoint[:, :, np.arange(1, 136, 2)], (68)) * img_size
    axis.scatter(x, y, s=3, c='r')

In [5]:
# randomly picking 25 % of train data to create each augmentations 
random_flip_images = np.random.choice(np.arange(frames.shape[0]), size=int(frames.shape[0] * 0.25), replace=False)
random_shift_images = np.random.choice(np.arange(frames.shape[0]), size=int(frames.shape[0] * 0.25), replace=False)

In [None]:
# flipping horizontally (both frame and keypoint)
def horizontal_flip(frame, keypoints):
    flipped_keypoints = []
    flipped_frame = np.flip(frame, axis=2)   # flip col wise
    for idx, sample_keypoints in enumerate(keypoints):
        flipped_keypoints.append([[[1.-coor if idxx%2==0 else coor for idxx,coor in enumerate(sample_keypoints[0][0])]]]) # idxx%2 == 0 will be all horizontal keypoints so flipping them only
    flipped_keypoints = np.array(flipped_keypoints)
    return flipped_frame, flipped_keypoints

flipped_train_frames, flipped_train_keypoints = horizontal_flip(frames[random_flip_images, :, :, :], y_train[random_flip_images, :, :, :])

# joining with train data
frames = np.concatenate((frames, flipped_train_frames))
y_train = np.concatenate((y_train, flipped_train_keypoints))

fig, axis = plt.subplots()
plot_sample(flipped_train_frames[0], flipped_train_keypoints[0], axis) 

In [None]:
# shifting 36 px each side -- 36 is arbitary value
def shift_images(images, keypoints):
    pixel_shifts = [36]
    shifted_images = []
    shifted_keypoints = []
    for shift in pixel_shifts:    # augmenting over several pixel shift values
        for (shift_x, shift_y) in [(-shift, -shift), (-shift, shift), (shift, -shift), (shift, shift)]:
            M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
            for image, keypoint in zip(images, keypoints):
                shifted_image = cv2.warpAffine(image, M, (224,224), flags=cv2.INTER_CUBIC)  # affine transformation
                shifted_keypoint = np.array([[[(point+shift_x/224) if idx%2==0 else (point+shift_y/224) for idx, point in enumerate(keypoint[0][0])]]])
                if np.all(0.0<shifted_keypoint) and np.all(shifted_keypoint<1.0):   # check if all values are inside 1 (Normalized keypoint) or else clip later
                    shifted_images.append(shifted_image.reshape(224,224,3))
                    shifted_keypoints.append(shifted_keypoint)
    shifted_keypoints = np.clip(shifted_keypoints,0.0,1.0)
    shifted_images = np.array(shifted_images)
    return shifted_images, shifted_keypoints

shifted_train_images, shifted_train_keypoints = shift_images(frames[random_shift_images, :, :, :], y_train[random_shift_images, :, :, :])

# joining with train data
frames = np.concatenate((frames, shifted_train_images))
y_train = np.concatenate((y_train, shifted_train_keypoints))

fig, axis = plt.subplots()
plot_sample(shifted_train_images[1], shifted_train_keypoints[1], axis)

In [9]:
# save np array for future use (training model)
np.save('frames30_AUG.npy', frames)
np.save('y_train30_AUG.npy', y_train)


## from 9k to 21k