In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import shutil
import random
import pandas as pd
from collections import Counter
import numpy as np
import tensorflow as tf

In [None]:
SOURCE_ROOT = "/content/drive/MyDrive/Flower_Classification/Flowers299"
TARGET_ROOT = "/content/drive/MyDrive/Flower_Classification/Flowers50_Selected"

os.makedirs(TARGET_ROOT, exist_ok=True)

In [None]:
TARGET_CLASSES = [
    "Roses", "Tulip", "Sunflower", "Daisy", "Lily", "Orchid",
    "Carnation", "Dahlia", "Hydrangea", "Peony", "Daffodil",
    "Chrysanthemum", "Marigold", "Jasmine", "Lavender",
    "Hyacinth", "Iris", "Gardenia", "Snapdragon", "Azalea",
    "LilyoftheValley", "Aster", "Hibiscus", "Geranium",
    "Zinnia", "Freesia", "CallaLily", "Magnolia", "Gladiolas",
    "Delphinium", "Pansy", "Begonia", "Camellia", "Anemone",
    "Clematis", "Poppy", "ForgetMeNot", "SweetPea", "Petunia",
    "Lilac", "Amaryllis", "BachelorGÇÖsButton", "Cosmos",
    "Foxglove", "BleedingHeart", "Poinsettia", "Buttercup",
    "PersianButtercup", "Viola", "MorningGlory"
]

In [None]:
missing = []

for cls in TARGET_CLASSES:
    src = os.path.join(SOURCE_ROOT, cls)
    dst = os.path.join(TARGET_ROOT, cls)

    if not os.path.isdir(src):
        missing.append(cls)
        continue

    shutil.copytree(src, dst)

print("Missing classes:", missing)
print("Selected classes copied:", len(TARGET_CLASSES) - len(missing))

In [None]:
SPLIT_ROOT = "/content/drive/MyDrive/Flower_Classification/Flowers50_Split"
splits = ["train", "val", "test"]

for split in splits:
    for cls in os.listdir(TARGET_ROOT):
        os.makedirs(os.path.join(SPLIT_ROOT, split, cls), exist_ok=True)

In [None]:
for cls in os.listdir(TARGET_ROOT):
    images = os.listdir(os.path.join(TARGET_ROOT, cls))
    random.shuffle(images)

    n = len(images)
    train_end = int(0.8 * n)
    val_end = int(0.9 * n)

    split_map = {
        "train": images[:train_end],
        "val": images[train_end:val_end],
        "test": images[val_end:]
    }

    for split, files in split_map.items():
        for f in files:
            shutil.copy(
                os.path.join(TARGET_ROOT, cls, f),
                os.path.join(SPLIT_ROOT, split, cls, f)
            )

In [None]:
def count_images(split_path):
    total = 0
    for cls in os.listdir(split_path):
        cls_path = os.path.join(split_path, cls)
        if os.path.isdir(cls_path):
            total += len(os.listdir(cls_path))
    return total

train_count = count_images(os.path.join(SPLIT_ROOT, "train"))
val_count   = count_images(os.path.join(SPLIT_ROOT, "val"))
test_count  = count_images(os.path.join(SPLIT_ROOT, "test"))

print(f"Train: {train_count}")
print(f"Val:   {val_count}")
print(f"Test:  {test_count}")

In [None]:
def per_class_counts(split):
    data = []
    split_path = os.path.join(SPLIT_ROOT, split)

    for cls in sorted(os.listdir(split_path)):
        cls_path = os.path.join(split_path, cls)
        if os.path.isdir(cls_path):
            count = len(os.listdir(cls_path))
            data.append({"class": cls, split: count})

    return pd.DataFrame(data)

df_train = per_class_counts("train")
df_val   = per_class_counts("val")
df_test  = per_class_counts("test")

df = df_train.merge(df_val, on="class").merge(df_test, on="class")
df

In [None]:
assert df.shape[0] == 50, "Not exactly 50 classes!"
assert train_count > val_count > 0
assert test_count > 0

print("Dataset structure verified successfully.")