In [1]:
import pandas as pd
import dlib
from tqdm import tqdm
from distutils.dir_util import copy_tree
import os
import itertools
import shutil

In [2]:
target_sets = ['set1', 'set5', 'set8', 'set10']
target_percs = [0.1, 0.25, 0.5]
target_folders = ['male_10_female_ori', 'male_25_female_ori', 'male_50_female_ori',
                  'male_ori_female_10', 'male_ori_female_25', 'male_ori_female_50']

In [3]:
preprocessing = "/home/monash/Desktop/fyp-work/fyp-ma-13/fyp-models/preprocessing"
root = f"{preprocessing}/cv_datasets"

In [4]:
def copy_folder(fromDir, toDir):
    """
    Copies entire folder to another directory
    """
    copy_tree(fromDir, toDir)

In [9]:
def gen_debiased_datasets(male_perc, female_perc):
    """
    Generated debiased datasets with a certain portion perturbed/unperturbed
    
    1. Copies set1, set5, set8 and set10 from cv_datasets to target_fp
    2. Replace training images with perturbed images
    
    """
    assert type(male_perc) == float or male_perc == 'ori'
    assert type(female_perc) == float or female_perc == 'ori'
    male_label = int(male_perc*100) if type(male_perc) == float else male_perc
    female_label = int(female_perc*100) if type(female_perc) == float else female_perc
    target_folder = f"male_{male_label}_female_{female_label}" # male_x_female_x
    target_fp = f"{preprocessing}/experiments_20112021/{target_folder}"
    # 1. Copy set1, set5, set8 and set10 from cv_datasets to target_fp
    for s in target_sets:
        set_folder = f"{root}/{s}" # gives you access to the folder set data
        copy_folder(set_folder, f'{target_fp}/{s}')

    glasses_fp = 'glasses_full/'   # Only for males
    makeup_fp  = 'makeup_full/female'

    # 2. Replace training images with perturbed
    for s in target_sets:
        target_set_fp = f"{target_fp}/{s}"
        # Get access to necessary folders
        set_folder = f"{root}/{s}" # gives you access to the folder set data
        train_folder = f"{set_folder}/train"
        from_male_train = f"{set_folder}/train/male"
        from_female_train = f"{set_folder}/train/female" 
        # Go to relevant train folder and update the images
        # Replace len(train)*perc of images with perturbed
        if type(male_perc) == float:
            to_male_train = f'{target_fp}/{s}/train/male'
            male_train_folder = os.listdir(to_male_train)
            glasses_folder    = os.listdir(glasses_fp)
            num_replace = int(len(male_train_folder)*male_perc) # Number of images to replace
            for i in tqdm(range(num_replace), 'Replacing training male images...'): # num_replace
                src = f'{glasses_fp}{glasses_folder[i]}'
                dst = to_male_train
                shutil.copy(src, dst)
        if type(female_perc) == float:
            to_female_train = f'{target_fp}/{s}/train/female'
            female_train_folder = os.listdir(to_female_train)
            glasses_folder    = os.listdir(glasses_fp)
            num_replace = int(len(female_train_folder)*female_perc) # Number of images to replace
            for i in tqdm(range(num_replace), 'Replacing training female images...'): # num_replace
                src = f'{glasses_fp}{glasses_folder[i]}'
                dst = to_female_train
                shutil.copy(src, dst)

In [10]:
gen_debiased_datasets(0.1, 'ori')

Replacing training male images...: 100%|██████████| 1161/1161 [00:00<00:00, 15611.70it/s]
Replacing training male images...: 100%|██████████| 1161/1161 [00:00<00:00, 14020.14it/s]
Replacing training male images...:   0%|          | 0/1161 [00:00<?, ?it/s]

Num: 1161
Num: 1161
Num: 1161


Replacing training male images...: 100%|██████████| 1161/1161 [00:00<00:00, 14049.63it/s]
Replacing training male images...: 100%|██████████| 1161/1161 [00:00<00:00, 14136.75it/s]

Num: 1161



