# Tools and Utils to create the dataset structure

In [40]:
import os
import sys
import zipfile
from tqdm import tqdm
import re

In [3]:
data_base_folder = 'D:\\Dev\\MinorProjDataset\\V3\\Data'

In [28]:
# Remove prefix from all files in a folder
def remove_prefix(folder: str, prefix: str):
    # Get the total number of files to be processed
    total_files = sum([len(files) for _, _, files in os.walk(folder)])
    
    # Recursively remove prefix from all the files in this folder and its subfolders
    with tqdm(total=total_files, desc="Removing Prefix", unit="file") as pbar:
        for root, dirs, files in os.walk(folder):
            for file in files:
                if file.startswith(prefix + '.'):
                    new_name = file[len(prefix) + 1:]
                    os.rename(os.path.join(root, file), os.path.join(root, new_name))
                elif file.startswith(prefix):
                    new_name = file[len(prefix):]
                    os.rename(os.path.join(root, file), os.path.join(root, new_name))
                pbar.update(1)

In [38]:
# Rename all folders in a folder
def rename_folders(base_folder: str, old_name_re: str, new_name: str):
    for root, dirs, _ in os.walk(base_folder):
        for dir in dirs:
            # If the folder name matches the regex, rename it
            if re.match(old_name_re, dir):
                new_dir = re.sub(old_name_re, new_name, dir)
                os.rename(os.path.join(root, dir), os.path.join(root, new_dir))

In [25]:
# Zip the folder with max compression
# Is single threaded and very slow, do not use for large folders like our dataset
def zip_folder(folder: str, zip_file_name: str = '') -> str:
    # Get the folder name
    folder_name = os.path.basename(folder)

    # If the zip file name is not provided, use the folder name
    if not zip_file_name:
        zip_file_name = folder_name + '.zip'

    # Create a zip file in the parent folder
    zip_file = os.path.join(os.path.dirname(folder), zip_file_name)
    
    # Get the total number of files to be zipped
    total_files = sum([len(files) for _, _, files in os.walk(folder)])
    
    with zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
        with tqdm(total=total_files, desc="Zipping", unit="file") as pbar:
            for root, dirs, files in os.walk(folder):
                for file in files:
                    zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), folder))
                    pbar.update(1)
    
    return zip_file

In [47]:
remove_prefix(data_base_folder, 'Asian_Village_Demo')
remove_prefix(data_base_folder, 'Stylized_Egypt_Demo')

Removing Prefix: 100%|██████████| 16946/16946 [00:00<00:00, 207054.02file/s]
Removing Prefix: 100%|██████████| 16946/16946 [00:00<00:00, 279219.33file/s]


In [48]:
rename_folders(data_base_folder, '^High_Gbuffer(s?)$', 'HighResGBuffer')
rename_folders(data_base_folder, '^Low_Gbuffer(s?)$', 'LowResGBuffer')
rename_folders(data_base_folder, '^High_Res$', 'HighRes')
rename_folders(data_base_folder, '^Low_Res$', 'LowRes')