In [None]:
# pip install imgaug

In [10]:
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import cv2
from torch.utils.data import Dataset, DataLoader


from imgaug import augmenters as iaa


In [11]:
class ImgAugTransform:
    def __init__(self):
        self.aug = iaa.Sequential([
            iaa.OneOf([
                iaa.Sometimes(0.25, iaa.AdditiveGaussianNoise(scale=0.1 * 255)),
                iaa.Sometimes(0.25, iaa.GaussianBlur(sigma=(0, 3.0)))
                ]),
            iaa.Affine(
                rotate=(-20, 20), mode="edge",
                scale={"x": (0.95, 1.05), "y": (0.95, 1.05)},
                translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)}
            ),
            iaa.AddToHueAndSaturation(value=(-10, 10), per_channel=True),
            iaa.GammaContrast((0.3, 2)),
            iaa.Fliplr(0.5),
        ])

    def __call__(self, img):
        img = np.array(img)
        img = self.aug.augment_image(img)
        return img

In [12]:
class FaceDataset(Dataset):
    def __init__(self, data_dir, data_type, img_size=224, augment=False, age_stddev=1.0):
        assert(data_type in ("train", "valid", "test"))
        csv_path = Path(data_dir).joinpath(f"gt_avg_{data_type}.csv")
        img_dir = Path(data_dir).joinpath(data_type)
        self.img_size = img_size
        self.augment = augment
        self.age_stddev = age_stddev

        if augment:
            self.transform = ImgAugTransform()
        else:
            self.transform = lambda i: i

        self.x = []
        self.y = []
        self.std = []
        df = pd.read_csv(str(csv_path))
        ignore_path = Path("./").resolve().joinpath("ignore_list.csv")
        ignore_img_names = list(pd.read_csv(str(ignore_path))["img_name"].values)

        for _, row in df.iterrows():
            img_name = row["file_name"]

            if img_name in ignore_img_names:
                continue

            img_path = img_dir.joinpath(img_name + "_face.jpg")
            assert(img_path.is_file())
            self.x.append(str(img_path))
            self.y.append(row["apparent_age_avg"])
            self.std.append(row["apparent_age_std"])

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        img_path = self.x[idx]
        age = self.y[idx]

        if self.augment:
            age += np.random.randn() * self.std[idx] * self.age_stddev

        img = cv2.imread(str(img_path), 1)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img = self.transform(img).astype(np.float32)
        return torch.from_numpy(np.transpose(img, (2, 0, 1))), np.clip(round(age), 0, 100)


In [13]:
dataset = FaceDataset("./appa-real-release/", "train")
print("train dataset len: {}".format(len(dataset)))
dataset = FaceDataset("./appa-real-release/", "valid")
print("valid dataset len: {}".format(len(dataset)))
dataset = FaceDataset("./appa-real-release/", "test")
print("test dataset len: {}".format(len(dataset)))

train dataset len: 3995
valid dataset len: 1500
test dataset len: 1978


In [20]:
train_dataset = FaceDataset("./appa-real-release/", "train", augment=True, age_stddev=1.0)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=True)

val_dataset = FaceDataset("./appa-real-release/", "valid", augment=False)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, drop_last=False)

test_dataset = FaceDataset("./appa-real-release/", "test", augment=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=False)

In [26]:
for (x, y) in test_loader:
    print(x.dtype)
    break

torch.float32
