## Impressionism Data

In [None]:
import pandas as pd

file_path = 'wikiart/wikiart_csv/genre_train.csv'
df = pd.read_csv(file_path)  
impressionism = df[df['path'].str.startswith("Impressionism/")]
impressionism.to_csv("impressionism.csv", index=False)


In [None]:
impressionism = pd.read_csv("impressionism.csv")  
counts = impressionism['class_index'].value_counts()
print(counts)

In [10]:
import shutil
from pathlib import Path
genre_class = {4:'landscape', 2:'genre_painting', 6:'portrait', 1:'cityscape'}


source_root = Path("wikiart") 
target_root = Path("data")

for _, row in impressionism.iterrows():
    class_idx = row['class_index']
    img_rel_path = row['path']

    if class_idx in genre_class:
        class_name = genre_class[class_idx]
        target_dir = target_root / class_name
        target_dir.mkdir(parents=True, exist_ok=True)

        src_path = source_root / img_rel_path
        dst_path = target_dir / Path(img_rel_path).name

        if src_path.exists():
            shutil.move(str(src_path), str(dst_path))
        else:
            print(f"error: {src_path}")



In [None]:
from pathlib import Path

data_dir = Path("data_impressionism")
subdirs = [d for d in data_dir.iterdir() if d.is_dir()]

for subdir in subdirs:
    file_count = len([f for f in subdir.iterdir() if f.is_file()])
    print(f"{subdir.name}: {file_count} files")


genre_painting: 2669 files
portrait: 1899 files
landscape: 3079 files
cityscape: 1545 files


## Fruits Data

In [None]:
from pathlib import Path

data_dir = Path("data_fruits/fruits-360_100x100/fruits-360/Training")
subdirs = [d for d in data_dir.iterdir() if d.is_dir()]

for subdir in subdirs:
    file_count = len([f for f in subdir.iterdir() if f.is_file()])
    print(f"{subdir.name}: {file_count} files")

Apple 12: 466 files
Apple 5: 440 files
Apple Granny Smith 1: 492 files
Apple 13: 699 files
Apple 14: 466 files
Apple Braeburn 1: 492 files
Apple Golden 2: 492 files
Apple Red 1: 492 files
Apple Golden 3: 481 files
Apple Rotten 1: 488 files
Apple Red Yellow 1: 492 files
Apple Pink Lady 1: 456 files
Apple Core 1: 235 files
Apple 11: 430 files
Apple 6: 473 files
Apple 8: 687 files
Apple 18: 484 files
Apple 19: 729 files
Apple hit 1: 702 files
Apple 9: 694 files
Apple 17: 610 files
Apple 7: 694 files
Apple 10: 699 files
Apple Crimson Snow 1: 444 files
Apple Golden 1: 480 files
Apple Red 2: 492 files
Apple Red Yellow 2: 672 files
Apple worm 1: 696 files
Apple Red 3: 429 files
Apple Red Delicious 1: 490 files


In [6]:
import os
import shutil


source_root = "./data_fruits/fruits-360_100x100/fruits-360/Test" 

target_root = "./data_apples"

class_map = {
    "Apple Golden 1": "Golden",
    "Apple Golden 2": "Golden",
    "Apple Golden 3": "Golden",
    "Apple Red Yellow 1": "Golden-Red",
    "Apple Red Yellow 2": "Golden-Red",
    "Apple Red 1": "Red",
    "Apple Red 2": "Red",
    "Apple Red 3": "Red"
}

os.makedirs(target_root, exist_ok=True)

for src_folder, new_class in class_map.items():
    src_path = os.path.join(source_root, src_folder)
    dst_path = os.path.join(target_root, new_class)

    os.makedirs(dst_path, exist_ok=True)

    if not os.path.exists(src_path):
        print(f"[Warning] Source folder does not exist: {src_path}")
        continue

    for file_name in os.listdir(src_path):
        src_file = os.path.join(src_path, file_name)
        dst_file = os.path.join(dst_path, f"{src_folder.replace(' ', '_')}_{file_name}")
        shutil.copyfile(src_file, dst_file)

print("✅ Image regrouping complete.")


✅ Image regrouping complete.


In [7]:
from pathlib import Path

data_dir = Path("data_apples")
subdirs = [d for d in data_dir.iterdir() if d.is_dir()]

for subdir in subdirs:
    file_count = len([f for f in subdir.iterdir() if f.is_file()])
    print(f"{subdir.name}: {file_count} files")

Golden-Red: 1547 files
Golden: 1938 files
Red: 1885 files
