In [1]:
import pandas as pd
import dlib
from tqdm import tqdm
from distutils.dir_util import copy_tree
import os
import itertools
import shutil

In [2]:
target_sets = ['set1', 'set5', 'set8', 'set10']
target_percs = [0.1, 0.25, 0.5]
target_folders = ['male_10_female_ori', 'male_25_female_ori', 'male_50_female_ori',
                  'male_ori_female_10', 'male_ori_female_25', 'male_ori_female_50']

In [3]:
preprocessing = "/home/monash/Desktop/fyp-work/fyp-ma-13/fyp-models/preprocessing"
root = f"{preprocessing}/cv_datasets"

In [4]:
def copy_folder(fromDir, toDir):
    """
    Copies entire folder to another directory
    """
    copy_tree(fromDir, toDir)

In [12]:
def gen_debiased_datasets(male_perc, female_perc):
    """
    Generated debiased datasets with a certain portion perturbed/unperturbed
    
    1. Copies set1, set5, set8 and set10 from cv_datasets to target_fp
    2. Replace training images with perturbed images
    
    """
    assert type(male_perc) == float or male_perc == 'ori'
    assert type(female_perc) == float or female_perc == 'ori'
    male_label = int(male_perc*100) if type(male_perc) == float else male_perc
    female_label = int(female_perc*100) if type(female_perc) == float else female_perc
    target_folder = f"male_{male_label}_female_{female_label}" # male_x_female_x
    target_fp = f"{preprocessing}/experiments_20112021/{target_folder}"
    # 1. Copy set1, set5, set8 and set10 from cv_datasets to target_fp
    for s in target_sets:
        set_folder = f"{root}/{s}" # gives you access to the folder set data
        copy_folder(set_folder, f'{target_fp}/{s}')

    glasses_fp = 'glasses_full/'   # Only for males
    makeup_fp  = 'makeup_full/female'

    # 2. Replace training images with perturbed
    for s in target_sets:
        target_set_fp = f"{target_fp}/{s}"
        # Get access to necessary folders
        set_folder = f"{root}/{s}" # gives you access to the folder set data
        train_folder = f"{set_folder}/train"
        from_male_train = f"{set_folder}/train/male"
        from_female_train = f"{set_folder}/train/female" 
        # Go to relevant train folder and update the images
        # Replace len(train)*perc of images with perturbed
        if type(male_perc) == float:
            to_male_train = f'{target_fp}/{s}/train/male'
            male_train_folder = os.listdir(to_male_train)
            glasses_folder    = os.listdir(glasses_fp)
            num_replace = int(len(male_train_folder)*male_perc) # Number of images to replace
            for i in tqdm(range(num_replace), f'Replacing training male images for set {s}...'): # num_replace
                src = f'{glasses_fp}{glasses_folder[i]}'
                dst = to_male_train
                shutil.copy(src, dst)
        if type(female_perc) == float:
            to_female_train = f'{target_fp}/{s}/train/female'
            female_train_folder = os.listdir(to_female_train)
            glasses_folder    = os.listdir(glasses_fp)
            num_replace = int(len(female_train_folder)*female_perc) # Number of images to replace
            for i in tqdm(range(num_replace), f'Replacing training female images for set {s}...'): # num_replace
                src = f'{glasses_fp}{glasses_folder[i]}'
                dst = to_female_train
                shutil.copy(src, dst)

In [13]:
percs = [0.1, 0.25, 0.5]
# for male perturbation only
for perc in percs: 
    gen_debiased_datasets(perc, 'ori')
    
# for female perturbation only
for perc in percs:
    gen_debiased_datasets('ori', perc)
    
# for both
for perc in percs:
    gen_debiased_datasets(perc, perc)

Replacing training male images for set set1...: 100%|██████████| 1288/1288 [00:00<00:00, 13201.37it/s]
Replacing training male images for set set5...: 100%|██████████| 1161/1161 [00:00<00:00, 14372.32it/s]
Replacing training male images for set set8...: 100%|██████████| 1161/1161 [00:00<00:00, 14085.59it/s]
Replacing training male images for set set10...: 100%|██████████| 1161/1161 [00:00<00:00, 14213.29it/s]
Replacing training male images for set set1...: 100%|██████████| 3386/3386 [00:00<00:00, 17442.93it/s]
Replacing training male images for set set5...: 100%|██████████| 2902/2902 [00:00<00:00, 13601.41it/s]
Replacing training male images for set set8...: 100%|██████████| 2902/2902 [00:00<00:00, 13871.74it/s]
Replacing training male images for set set10...: 100%|██████████| 2902/2902 [00:00<00:00, 13835.00it/s]
Replacing training male images for set set1...: 100%|██████████| 5805/5805 [00:00<00:00, 17453.59it/s]
Replacing training male images for set set5...: 100%|██████████| 5805/5