<a href="https://colab.research.google.com/github/Karthick47v2/face-landmark-detector/blob/main/300w.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


## Import libs


In [1]:
import glob
import pandas as pd
import numpy as np
import scipy.io
import cv2
import matplotlib.pyplot as plt
from itertools import chain
import os
import imgaug.augmenters as iaa
from imgaug.augmentables import Keypoint, KeypointsOnImage


## Connect to GDrive


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')


## Extract dataset


In [None]:
# afw
!mkdir afw
!unzip / content/gdrive/MyDrive/300w/afw.zip - d afw/

# # ibug
!mkdir ibug
!unzip / content/gdrive/MyDrive/300w/ibug.zip - d ibug/

# # helen
!mkdir helen
!unzip / content/gdrive/MyDrive/300w/helen.zip - d helen/

# # lfpw
!mkdir lfpw
!unzip / content/gdrive/MyDrive/300w/lfpw.zip - d lfpw/

# bounding box region
!mkdir bb
!unzip / content/gdrive/MyDrive/300w/bounding_boxes.zip - d bb/


## Read data


In [105]:
def read_mat(filename):
    mat = scipy.io.loadmat(f"bb/Bounding Boxes/{filename}.mat")
    if filename == 'bounding_boxes_ibug':
        return mat['bounding_boxes'][0][0:135]
    return mat['bounding_boxes'][0]


def read_pts(filepath):
    return np.loadtxt(filepath, comments=("version:", "n_points:", "{", "}"))


def get_lm(dataset, img_name):
    return list(map(lambda x: read_pts(f"{dataset}/{x.split('.')[0]}.pts"), img_name))


def mat_to_pd(filename, dataset):
    mat = read_mat(filename)
    path, name, bb = zip(*[(f"/content/{dataset}/{x['imgName'][0][0][0]}", x['imgName'][0][0][0],
                            x['bb_ground_truth'][0][0][0]) for x in mat])

    return pd.DataFrame({'path': path, 'name': name, 'bb': bb, 'lm': get_lm(dataset, name)})


train_dataset = ['afw', 'helen/trainset', 'lfpw/trainset']
test_dataset = ['ibug', 'helen/testset', 'lfpw/testset']
train_bb = ['bounding_boxes_afw', 'bounding_boxes_helen_trainset',
            'bounding_boxes_lfpw_trainset']
test_bb = ['bounding_boxes_ibug', 'bounding_boxes_helen_testset',
           'bounding_boxes_lfpw_testset']

train_pd = []
test_pd = []

for tr_dataset, tr_bb, te_dataset, te_bb in zip(train_dataset, train_bb, test_dataset, test_bb):
    train_pd.append(mat_to_pd(tr_bb, tr_dataset))
    test_pd.append(mat_to_pd(te_bb, te_dataset))


In [None]:
train_pd[0].head()


In [None]:
test_pd[0].head()


Length of AFW - 337

Length of iBug - 135

Length of helen (train+test) - 2000 + 330

Length of LFPW (train+test - 881 + 224


### Visualize annotated data


In [28]:
sample_idx = 2


def plot_annotated_img(pd):
    img = cv2.imread(pd['path'][sample_idx])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.scatter(pd['lm'][sample_idx][:, 0], pd['lm']
                [sample_idx][:, 1], s=3, c='r')


In [None]:
# AFW
plot_annotated_img(train_pd[0])


In [None]:
# iBUG
plot_annotated_img(test_pd[0])


In [None]:
# HELEN
plot_annotated_img(train_pd[1])


In [None]:
# LFPW
plot_annotated_img(train_pd[2])


### Histogram of image dimensions (after crpped)


In [44]:
def plot_cropped_hist(pd):
    fig, ax = plt.subplots(1, 2)

    x1, y1, x2, y2 = zip(*pd.loc[:, 'bb'])

    ax[0].hist(list(map(lambda i, j: i - j, x2, x1)), bins=5)
    ax[1].hist(list(map(lambda i, j: i - j, y2, y1)), bins=5)
    plt.show()


In [None]:
# AFW
plot_cropped_hist(train_pd[0])


In [None]:
# HELEN
plot_cropped_hist(train_pd[1])


In [None]:
# LFPW
plot_cropped_hist(train_pd[2])


## Data preprocessing


since most lie under 200 resize all to 192x192..


Crop image and adjust landmarks then save to **respective** folder


In [106]:
img_size = 96

train_lst = ['afw', 'helen_train', 'lfpw_train']
test_lst = ['ibug', 'helen_test', 'lfpw_test']

train_cropped_lst = [f"{x}_cropped" for x in train_lst]
test_cropped_lst = [f"{x}_cropped" for x in test_lst]

for dir in (train_cropped_lst + test_cropped_lst):
    if not os.path.exists(dir):
        os.mkdir(dir)


def resize_w_pad(img, lm, req_size):
    original_shape = (img.shape[1], img.shape[0])

    ratio = float(max(req_size))/max(original_shape)
    new_size = [int(x*ratio) for x in original_shape]

    img = cv2.resize(img, tuple(new_size))
    delta_w, delta_h = req_size[0] - new_size[0], req_size[1] - new_size[1]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    lm *= [float(new_size[0]/original_shape[0]),
           float(new_size[1]/original_shape[1])]
    lm += [left, top]
    lm = np.clip(lm, 0, req_size[0])
    img = cv2.copyMakeBorder(img, top, bottom, left,
                             right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
    return img, lm


def preprocess_data(row, dataset):
    un_img = cv2.imread(row['path'])
    x1, y1, x2, y2 = row['bb']
    # crop
    c_img = un_img[int(y1-1):int(y2+1), int(x1-1):int(x2+1)]
    c_lm = row['lm'] - [x1, y1]
    # resize_with_padding
    resized_img, resized_lm = resize_w_pad(c_img, c_lm, (img_size, img_size))

    cv2.imwrite(f"{dataset}_cropped/{row['name']}", resized_img)
    return resized_lm.ravel()


def generate_df(df, name):
    lms = list(map(lambda x: preprocess_data(x[1], name), df.iterrows()))

    # dataframe for image generator
    return pd.DataFrame(
        {'path': f"{name}_cropped/" + df['name'].values,
         **{f"{i}": [lm[i] for lm in lms] for i in range(136)}}
    )


train_df = []
test_df = []

for idx, (tr, te) in enumerate(zip(train_lst, test_lst)):
    train_df.append(generate_df(train_pd[idx], tr))
    test_df.append(generate_df(test_pd[idx], te))

test_df = pd.concat(test_df, axis=0)
test_df.to_csv('test.csv', index=False)


## Sample


In [65]:
sample_idx = 124


def crop_(path, box, name, bb, lm):
    un_img = cv2.imread(path)
    x1, y1, x2, y2 = box

    un_img = cv2.cvtColor(un_img, cv2.COLOR_BGR2RGB)

    c_img = un_img[int(y1-1):int(y2+1), int(x1-1):int(x2+1)]
    c_lm = lm - [x1, y1]
    return resize_w_pad(c_img, c_lm, (img_size, img_size))


def plot_cropped_img(pd):

    cropped_img, lms = crop_(pd['path'][sample_idx], pd['bb'][sample_idx],
                             pd['name'][sample_idx], pd['bb'][sample_idx], pd['lm'][sample_idx])
    lms = lms.ravel()

    plt.imshow(cropped_img)
    plt.scatter(lms[0:136:2], lms[1:136:2], s=3, c='r')


In [None]:
# AFW
plot_annotated_img(train_pd[0])


In [None]:
plot_cropped_img(train_pd[0])


In [None]:
# iBug
plot_annotated_img(test_pd[0])


In [None]:
plot_cropped_img(test_pd[0])


## Data augmentation


In [107]:
# randomly picking 50 % of train data to create each augmentations

random_df = []

for df in train_df:
    random_df.append(np.random.choice(
        np.arange(df.shape[0]), size=int(df.shape[0] * 0.5), replace=False))

aug_lst = [f"{x}_aug" for x in train_cropped_lst]

for dir in aug_lst:
    if not os.path.exists(dir):
        os.mkdir(dir)


### Rotate image


In [108]:
def aug_rotate(df, dataset, angle):
    lis = []
    lin = []

    seq = iaa.Sequential(
        [
            iaa.Affine(rotate=angle)
        ]
    )

    for idx, row in df.iterrows():

        xcoord = row[1:137:2].copy()
        ycoord = row[2:137:2].copy()
        un_img = cv2.imread(row['path'])
        rotated = False
        count = 1

        while not rotated:
            new_coord = []
            l = []

            for i in range(len(xcoord)):
                new_coord.append(Keypoint(xcoord[i], ycoord[i]))

            kps = KeypointsOnImage(new_coord, shape=(un_img.shape))
            new_img, new_kps = seq(image=un_img, keypoints=kps)

            for i in range(len(kps.keypoints)):
                l.append(new_kps.keypoints[i].x)
                l.append(new_kps.keypoints[i].y)

            if(all(kp >= 0 and kp <= img_size for kp in l)):
                cv2.imwrite(
                    f"{dataset}_cropped_aug/A_R{angle}_{idx}.png", new_img)
                lin.append(f"{dataset}_cropped_aug/A_R{angle}_{idx}.png")
                lis.append(l)
                rotated = True
            else:
                un_img = cv2.imread(row['path'])
                lkp = row[1:137:1].copy().values.reshape(-1, 2)

                un_img, lkp = resize_w_pad(
                    un_img, lkp, (img_size - (count * 10), img_size - (count * 10)))
                delta_w, delta_h = img_size - \
                    (img_size - (count * 10)), img_size - \
                    (img_size - (count * 10))

                top, bottom = delta_h//2, delta_h-(delta_h//2)
                left, right = delta_w//2, delta_w-(delta_w//2)
                lkp += [left, top]
                lkp = np.clip(lkp, top, img_size - top)
                un_img = cv2.copyMakeBorder(
                    un_img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))

                lkp = lkp.reshape(136)
                count += 1

                xcoord = lkp[0:136:2].copy()
                ycoord = lkp[1:136:2].copy()

    return np.asarray(lin), np.asarray(lis)


In [109]:
aug_df = []

for df, random, name in zip(train_df, random_df, train_lst):
    aug_lin = []
    aug_lis = []
    for ang in [-30, -20, -10, 10, 20, 30]:
        lin, lis = aug_rotate(df.iloc[random], name, ang)
        aug_lin.append(lin)
        aug_lis.append(lis)

    np_lin = np.concatenate(aug_lin, axis=0)
    np_lis = np.concatenate(aug_lis, axis=0)

    aug_df.append(pd.DataFrame(
        {'path': np_lin, **{f"{i}": [lm[i] for lm in np_lis] for i in range(136)}}))


In [113]:
aug_train_df = pd.concat((aug_df + train_df), axis=0)
aug_train_df.to_csv('train.csv', index=False)


In [None]:
dd = pd.read_csv('train.csv')
sample_idx = 4050

img = cv2.imread(dd['path'][sample_idx])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img)
plt.scatter(dd.iloc[sample_idx, 1:137:2],
            dd.iloc[sample_idx, 2:137:2], s=3, c='r')


## Save to GDrive


In [None]:
!zip - r 300w_f_96.zip afw_cropped afw_cropped_aug ibug_cropped helen_train_cropped helen_train_cropped_aug helen_test_cropped lfpw_train_cropped lfpw_train_cropped_aug lfpw_test_cropped train.csv test.csv


In [None]:
!mv 300w_f_96.zip / content/gdrive/MyDrive/300cw/
