In [1]:
import os
from typing import Union

In [2]:
TEST_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\images\\test'
TRAIN_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\images\\train'
VAL_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\images\\val'

RAW_TEST_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\raw_images\\test'
RAW_TRAIN_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\raw_images\\train'
RAW_VAL_FOLDER_PATH = 'D:\\KPI\\Bachelor_thesis\\code\\data\\raw_images\\val'

In [3]:
def count_files(path: str) -> int:
    """
    Count the total number of files in a directory and its subdirectories.

    Args:
        path (str): The path to the directory.

    Returns:
        int: The total number of files.
    """
    return sum(len(files) for _root, _dirs, files in os.walk(path))


print(f"Total files in test folder  = {count_files(TEST_FOLDER_PATH)}")
print(f"Total files in train folder = {count_files(TRAIN_FOLDER_PATH)}")
print(f"Total files in val folder   = {count_files(VAL_FOLDER_PATH)}\n")

print(f"Total files in raw_test folder  = {count_files(RAW_TEST_FOLDER_PATH)}")
print(f"Total files in raw_train folder = {count_files(RAW_TRAIN_FOLDER_PATH)}")
print(f"Total files in raw_val folder   = {count_files(RAW_VAL_FOLDER_PATH)}")

Total files in test folder  = 66232
Total files in train folder = 445929
Total files in val folder   = 32968

Total files in raw_test folder  = 100000
Total files in raw_train folder = 681482
Total files in raw_val folder   = 50000


In [4]:
def get_folder_size(path: str) -> Union[float, str]:
    """
    Calculate the total size of files in a directory and its subdirectories.

    Args:
        path (str): The path to the directory.

    Returns:
        Union[float, str]: The total size of files, formatted as GB, TB, etc.
    """
    total_size = 0
    for root, _dirs, files in os.walk(path):
        for file in files:
            file_path = os.path.join(root, file)
            total_size += os.path.getsize(file_path)
    return format_size(total_size)


def format_size(size: int) -> str:
    """
    Format the size in bytes into a human-readable format.

    Args:
        size (int): The size in bytes.

    Returns:
        str: The formatted size with appropriate unit (e.g., GB, TB).
    """
    for unit in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return f"{size:.2f} {unit}"
        size /= 1024.0


print(f"Total size of test folder  = {get_folder_size(TEST_FOLDER_PATH)}")
print(f"Total size of train folder = {get_folder_size(TRAIN_FOLDER_PATH)}")
print(f"Total size of val folder   = {get_folder_size(VAL_FOLDER_PATH)}\n")

print(f"Total size of raw_test folder  = {get_folder_size(RAW_TEST_FOLDER_PATH)}")
print(f"Total size of raw_train folder = {get_folder_size(RAW_TRAIN_FOLDER_PATH)}")
print(f"Total size of raw_val folder   = {get_folder_size(RAW_VAL_FOLDER_PATH)}")

Total size of test folder  = 8.36 GB
Total size of train folder = 53.70 GB
Total size of val folder   = 4.15 GB

Total size of raw_test folder  = 12.67 GB
Total size of raw_train folder = 77.26 GB
Total size of raw_val folder   = 6.25 GB
