In [28]:
import os
import pandas as pd

In [29]:
def find_deepest_paths(root_dir):
    deepest_paths = []
    max_depth = 0

    for dirpath, _, filenames in os.walk(root_dir):
        depth = dirpath.count(os.sep)
        if depth > max_depth:
            max_depth = depth
            deepest_paths = [dirpath]
        elif depth == max_depth:
            deepest_paths.append(dirpath)

    return deepest_paths

In [30]:
def count_images_in_paths(paths):
    image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.JPG', '.JPEG', '.PNG', '.GIF', '.BMP', '.TIFF'}
    counts = []

    for path in paths:
        count = sum(1 for f in os.listdir(path) if os.path.splitext(f)[1].lower() in image_extensions)
        counts.append((path, count))

    return counts

In [31]:
def main():
    root_dir = '/hexp/data/genImage_filtered_squeeze'
    deepest_paths = find_deepest_paths(root_dir)
    image_counts = count_images_in_paths(deepest_paths)

    train_data = [(os.path.relpath(path, root_dir), count) for path, count in image_counts if 'train' in path]
    val_data = [(os.path.relpath(path, root_dir), count) for path, count in image_counts if 'val' in path]

    df_train = pd.DataFrame(train_data, columns=['Path', 'Image Count'])
    df_val = pd.DataFrame(val_data, columns=['Path', 'Image Count'])

    pd.options.display.colheader_justify = 'left'  # 左对齐列标题
    pd.options.display.float_format = '{:<}'.format  # 左对齐浮点数

    print("Train Data:")
    print(df_train.to_string(index=False, justify='left'))
    print("\nValidation Data:")
    print(df_val.to_string(index=False, justify='left'))

In [32]:
if __name__ == "__main__":
    main()

Train Data:
Path                                  Image Count
imagenet_ai_0419_biggan/train/nature 16200       
    imagenet_ai_0419_biggan/train/ai 16199       
   imagenet_ai_0508_adm/train/nature 15745       
       imagenet_ai_0508_adm/train/ai 16199       
    imagenet_midjourney/train/nature 16170       
        imagenet_midjourney/train/ai 16199       
  imagenet_ai_0419_sdv4/train/nature 16200       
      imagenet_ai_0419_sdv4/train/ai   870       
  imagenet_ai_0419_vqdm/train/nature  1192       
      imagenet_ai_0419_vqdm/train/ai 16200       
  imagenet_ai_0424_sdv5/train/nature 15327       
      imagenet_ai_0424_sdv5/train/ai 16600       
imagenet_ai_0424_wukong/train/nature 16073       
    imagenet_ai_0424_wukong/train/ai 16200       
         imagenet_glide/train/nature 16200       
             imagenet_glide/train/ai 16199       

Validation Data:
Path                                Image Count
imagenet_ai_0419_biggan/val/nature 600         
    imagenet_ai_0419_big