In [1]:
from tqdm import tqdm
import numpy as np
import os
from os import path
import glob
import shutil
import pandas as pd

In [3]:
'''
    dataset_dir: '../../Data'
    300W, afw, helen, ibug, lfpw, 300VW
'''

"\n    dataset_dir: '../../Data'\n    300W, afw, helen, ibug, lfpw, 300VW\n"

In [4]:
class Merge_Datasets:
    def __init__(self, base_dir='../../Data'):
        self.base_dir = base_dir
        self.target_dir = path.join(self.base_dir, '300W_train')
        self.train_csv = path.join(self.target_dir, 'train.csv')

        self.df = pd.DataFrame(columns=['image', 'label'])

    def read_pts(self, filename):
        return np.loadtxt(filename, comments=("version:", "n_points:", "{", "}"))

    def merge_total(self, ds_name):
        targetPattern = path.join(self.base_dir, ds_name, '*.pts')
        pts_list = glob.glob(targetPattern)

        for pts_file in tqdm(pts_list, desc=ds_name):
            pts_arr = self.read_pts(pts_file)
            if pts_arr.shape != (68, 2):
                print(f'shape error: {pts_arr}')
                continue

            base_csv = path.splitext(path.basename(pts_file))[0] + '.csv'
            csv_file = path.join(self.target_dir, ds_name, 'label', base_csv)
            np.savetxt(csv_file, pts_arr, fmt='%1.6f')

            base_img = path.splitext(path.basename(pts_file))[0] + '.jpg'
            img_file = path.join(self.base_dir, ds_name, base_img)
            save_img = path.join(self.target_dir, ds_name, 'image', base_img)
            shutil.copyfile(img_file, save_img)

            dir_img = save_img.split(path.sep)[-3:]
            dir_img = path.join(dir_img[0], dir_img[1], dir_img[2])
            dir_csv = csv_file.split(path.sep)[-3:]
            dir_csv = os.path.join(dir_csv[0], dir_csv[1], dir_csv[2])
            self.df.loc[self.df.shape[0]] = {'image': dir_img, 'label': dir_csv}

    def merge_traintest(self, ds_name, img_type):
        # train
        mode = 'trainset'
        targetPattern = path.join(self.base_dir, ds_name, mode, '*.pts')
        pts_list = glob.glob(targetPattern)
        
        for pts_file in tqdm(pts_list, desc=ds_name + '_' + mode):
            pts_arr = self.read_pts(pts_file)
            if pts_arr.shape != (68, 2):
                print(f'shape error: {pts_arr}')
                continue

            base_csv = path.splitext(path.basename(pts_file))[0] + '.csv'
            csv_file = path.join(self.target_dir, ds_name, 'label', mode + '_' + base_csv)
            np.savetxt(csv_file, pts_arr, fmt='%1.6f')
            
            base_img = path.splitext(path.basename(pts_file))[0] + '.' + img_type
            img_file = path.join(self.base_dir, ds_name, mode, base_img)
            save_img = path.join(self.target_dir, ds_name, 'image', mode + '_' + base_img)
            shutil.copyfile(img_file, save_img)

            dir_img = save_img.split(path.sep)[-3:]
            dir_img = path.join(dir_img[0], dir_img[1], dir_img[2])
            dir_csv = csv_file.split(path.sep)[-3:]
            dir_csv = os.path.join(dir_csv[0], dir_csv[1], dir_csv[2])
            self.df.loc[self.df.shape[0]] = {'image': dir_img, 'label': dir_csv}

        # test
        mode = 'testset'
        targetPattern = path.join(self.base_dir, ds_name, mode, '*.pts')
        pts_list = glob.glob(targetPattern)
        
        for pts_file in tqdm(pts_list, desc=ds_name + '_' + mode):
            pts_arr = self.read_pts(pts_file)
            if pts_arr.shape != (68, 2):
                print(f'shape error: {pts_arr}')
                continue

            base_csv = path.splitext(path.basename(pts_file))[0] + '.csv'
            csv_file = path.join(self.target_dir, ds_name, 'label', mode + '_' + base_csv)
            np.savetxt(csv_file, pts_arr, fmt='%1.6f')

            base_img = path.splitext(path.basename(pts_file))[0] + '.' + img_type
            img_file = path.join(self.base_dir, ds_name, mode, base_img)
            save_img = path.join(self.target_dir, ds_name, 'image', mode + '_' + base_img)
            shutil.copyfile(img_file, save_img)

            dir_img = save_img.split(path.sep)[-3:]
            dir_img = path.join(dir_img[0], dir_img[1], dir_img[2])
            dir_csv = csv_file.split(path.sep)[-3:]
            dir_csv = os.path.join(dir_csv[0], dir_csv[1], dir_csv[2])
            self.df.loc[self.df.shape[0]] = {'image': dir_img, 'label': dir_csv}

    def merge_afw(self):
        ds_name = 'afw'
        self.merge_total(ds_name)

    def merge_ibug(self):
        ds_name = 'ibug'
        self.merge_total(ds_name)

    def merge_helen(self):
        ds_name = 'helen'
        self.merge_traintest(ds_name, 'jpg')

    def merge_lfpw(self):
        ds_name = 'lfpw'
        self.merge_traintest(ds_name, 'png')

    def merge(self):
        self.merge_afw()
        self.merge_ibug()
        self.merge_helen()
        self.merge_lfpw()
        print(f'DataFrame\'s shape: {self.df.shape}')

    def dataframe(self):
        return self.df

    def save_csv(self, path):
        self.df.to_csv(path)

    def __len__(self):
        return self.df.shape[0]

In [13]:
MergeDatasets = Merge_Datasets('../../Data')
MergeDatasets.merge()
MergeDatasets.save_csv(MergeDatasets.train_csv)

afw: 100%|██████████| 337/337 [00:00<00:00, 463.45it/s]
ibug: 100%|██████████| 135/135 [00:00<00:00, 495.54it/s]
helen_trainset: 100%|██████████| 2000/2000 [00:04<00:00, 497.41it/s]
helen_testset: 100%|██████████| 330/330 [00:00<00:00, 481.40it/s]
lfpw_trainset: 100%|██████████| 811/811 [00:01<00:00, 481.29it/s]
lfpw_testset: 100%|██████████| 224/224 [00:00<00:00, 486.66it/s]

DataFrame's shape: (3837, 2)





In [44]:
class Merge_300W:
    def __init__(self, base_dir='../../Data'):
        self.base_dir = base_dir
        self.target_dir = path.join(self.base_dir, '300W')

        self.df = pd.DataFrame(columns=['image', 'label'])

    def read_pts(self, filename):
        return np.loadtxt(filename, comments=("version:", "n_points:", "{", "}"))

    def merge(self):
        ds_name = '300W'
        img_type = 'png'
        # train
        mode = '01_Indoor'
        targetPattern = path.join(self.base_dir, ds_name, mode, '*.pts')
        pts_list = glob.glob(targetPattern)
        
        for pts_file in tqdm(pts_list, desc=ds_name + '_' + mode):
            pts_arr = self.read_pts(pts_file)
            if pts_arr.shape != (68, 2):
                print(f'shape error: {pts_arr}')
                continue

            base_csv = path.splitext(path.basename(pts_file))[0] + '.csv'
            csv_file = path.join(self.target_dir, ds_name, 'label', base_csv)
            np.savetxt(csv_file, pts_arr, fmt='%1.6f')
            
            base_img = path.splitext(path.basename(pts_file))[0] + '.' + img_type
            img_file = path.join(self.base_dir, ds_name, mode, base_img)
            save_img = path.join(self.target_dir, ds_name, 'image', base_img)
            shutil.copyfile(img_file, save_img)

            dir_img = save_img.split(path.sep)[-3:]
            dir_img = path.join(dir_img[0], dir_img[1], dir_img[2])
            dir_csv = csv_file.split(path.sep)[-3:]
            dir_csv = os.path.join(dir_csv[0], dir_csv[1], dir_csv[2])
            self.df.loc[self.df.shape[0]] = {'image': dir_img, 'label': dir_csv}

        # test
        mode = '02_Outdoor'
        targetPattern = path.join(self.base_dir, ds_name, mode, '*.pts')
        pts_list = glob.glob(targetPattern)
        
        for pts_file in tqdm(pts_list, desc=ds_name + '_' + mode):
            pts_arr = self.read_pts(pts_file)
            if pts_arr.shape != (68, 2):
                print(f'shape error: {pts_arr}')
                continue

            base_csv = path.splitext(path.basename(pts_file))[0] + '.csv'
            csv_file = path.join(self.target_dir, ds_name, 'label', base_csv)
            np.savetxt(csv_file, pts_arr, fmt='%1.6f')

            base_img = path.splitext(path.basename(pts_file))[0] + '.' + img_type
            img_file = path.join(self.base_dir, ds_name, mode, base_img)
            save_img = path.join(self.target_dir, ds_name, 'image', base_img)
            shutil.copyfile(img_file, save_img)

            dir_img = save_img.split(path.sep)[-3:]
            dir_img = path.join(dir_img[0], dir_img[1], dir_img[2])
            dir_csv = csv_file.split(path.sep)[-3:]
            dir_csv = os.path.join(dir_csv[0], dir_csv[1], dir_csv[2])
            self.df.loc[self.df.shape[0]] = {'image': dir_img, 'label': dir_csv}

        print(f'DataFrame\'s shape: {self.df.shape}')

    def dataframe(self):
        return self.df

    def save_csv(self, path):
        self.df.to_csv(path)

    def __len__(self):
        return self.df.shape[0]

In [45]:
Merge300W = Merge_300W()

In [46]:
Merge300W.merge()

300W_01_Indoor: 100%|██████████| 300/300 [00:02<00:00, 135.90it/s]
300W_02_Outdoor:   0%|          | 0/300 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: '../../Data/300W/image/outdoor_289.png'

In [40]:
Merge300W.save_csv('eval.csv')