# Imports

In [1]:
import os
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import math
import shutil
import random
import csv


# Oxford Flower Image Dataset


In [8]:
# Paths
oxford_flower_dataset_path = '/Users/maggie/Desktop/project_3/Plant_ID_and_Diagnosis/Resources/plant_identifcation_resources/testflowers/dataset'
json_file_path = '/Users/maggie/Desktop/project_3/Plant_ID_and_Diagnosis/Resources/plant_identifcation_resources/testflowers/cat_to_name.json'
oxford_flowers_folder = '/Users/maggie/Desktop/project_3/Plant_ID_and_Diagnosis/Resources/plant_dr_master_dataset/102_oxford_flowers'
csv_file_path = os.path.join(oxford_flowers_folder, 'image_labels.csv')

# Ensure the output directory exists
os.makedirs(oxford_flowers_folder, exist_ok=True)


In [9]:
# Load the JSON file with category to name mappings
with open(json_file_path, 'r') as f:
    cat_to_name = json.load(f)

# Update the class names to replace spaces with underscores
cat_to_name = {key: value.replace(' ', '_') for key, value in cat_to_name.items()}

# Print a few sample mappings to inspect
for i, (key, value) in enumerate(cat_to_name.items()):
    if i < 10:
        print(f"Category ID: {key}, Class Name: {value}")
    else:
        break

# Function to rename folders and files with underscores
def replace_spaces_with_underscores(root_dir):
    for dirpath, dirnames, filenames in os.walk(root_dir, topdown=False):
        for filename in filenames:
            if ' ' in filename:
                new_filename = filename.replace(' ', '_')
                os.rename(os.path.join(dirpath, filename), os.path.join(dirpath, new_filename))
        for dirname in dirnames:
            if ' ' in dirname:
                new_dirname = dirname.replace(' ', '_')
                os.rename(os.path.join(dirpath, dirname), os.path.join(dirpath, new_dirname))

replace_spaces_with_underscores(oxford_flower_dataset_path)


Category ID: 21, Class Name: fire_lily
Category ID: 3, Class Name: canterbury_bells
Category ID: 45, Class Name: bolero_deep_blue
Category ID: 1, Class Name: pink_primrose
Category ID: 34, Class Name: mexican_aster
Category ID: 27, Class Name: prince_of_wales_feathers
Category ID: 7, Class Name: moon_orchid
Category ID: 16, Class Name: globe-flower
Category ID: 25, Class Name: grape_hyacinth
Category ID: 26, Class Name: corn_poppy


In [10]:
# ImageDataGenerator setup
img_size = (128, 128)
datagen = ImageDataGenerator(rescale=1./255)

# Load training and validation data
train_generator = datagen.flow_from_directory(
    os.path.join(oxford_flower_dataset_path, 'train'),
    target_size=img_size,
    batch_size=32,
    class_mode='categorical'
)

# Print class indices to verify the mapping
print("Class indices:", train_generator.class_indices)


Found 6552 images belonging to 102 classes.
Class indices: {'1': 0, '10': 1, '100': 2, '101': 3, '102': 4, '11': 5, '12': 6, '13': 7, '14': 8, '15': 9, '16': 10, '17': 11, '18': 12, '19': 13, '2': 14, '20': 15, '21': 16, '22': 17, '23': 18, '24': 19, '25': 20, '26': 21, '27': 22, '28': 23, '29': 24, '3': 25, '30': 26, '31': 27, '32': 28, '33': 29, '34': 30, '35': 31, '36': 32, '37': 33, '38': 34, '39': 35, '4': 36, '40': 37, '41': 38, '42': 39, '43': 40, '44': 41, '45': 42, '46': 43, '47': 44, '48': 45, '49': 46, '5': 47, '50': 48, '51': 49, '52': 50, '53': 51, '54': 52, '55': 53, '56': 54, '57': 55, '58': 56, '59': 57, '6': 58, '60': 59, '61': 60, '62': 61, '63': 62, '64': 63, '65': 64, '66': 65, '67': 66, '68': 67, '69': 68, '7': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78, '79': 79, '8': 80, '80': 81, '81': 82, '82': 83, '83': 84, '84': 85, '85': 86, '86': 87, '87': 88, '88': 89, '89': 90, '9': 91, '90': 92, '91': 93, '92': 94, '93': 

In [11]:
# Check existing entries in the CSV file to prevent duplicates
existing_filenames = set()
if os.path.exists(csv_file_path):
    with open(csv_file_path, mode='r') as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            existing_filenames.add(row['filename'])

existing_filenames


set()

In [13]:
# Open the CSV file in the appropriate mode
with open(csv_file_path, mode='a' if existing_filenames else 'w', newline='') as csv_file:
    fieldnames = ['filename', 'label']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    # Write the header if the file is being created for the first time
    if not existing_filenames:
        writer.writeheader()

    # Iterate over the entire dataset using the data generator
    for batch_index in range(len(train_generator)):
        # Get a batch of images and labels
        images, labels = next(train_generator)

        # Loop through the images and labels to save them with correct filenames
        for i in range(len(images)):
            # Get the image array and convert it to a PIL Image
            image_array = images[i]
            image = Image.fromarray((image_array * 255).astype('uint8'))  # Convert from normalized to pixel values if needed

            # Get the class index and map it to the correct class name
            class_index = labels[i].argmax()  # Find the class index
            class_name = list(train_generator.class_indices.keys())[class_index]

            # Map the class name to the human-readable label using the JSON file
            human_readable_label = cat_to_name.get(class_name, "Unknown")
            
            # Append "_healthy" to the label if necessary
            if "healthy" in human_readable_label.lower():
                human_readable_label += "_healthy"

            # Define the file name and save path
            filename = f"{human_readable_label}_{batch_index}_{i+1}.jpg"
            save_path = os.path.join(oxford_flowers_folder, filename)

            # Check if the file already exists
            if filename not in existing_filenames:
                # Save the image
                image.save(save_path)

                # Write the filename and label to the CSV file
                writer.writerow({'filename': filename, 'label': human_readable_label})

                print(f"Saved image {filename} and wrote to CSV with label {human_readable_label}")
            else:
                print(f"Skipped existing image {filename}")


Saved image water_lily_0_1.jpg and wrote to CSV with label water_lily
Saved image great_masterwort_0_2.jpg and wrote to CSV with label great_masterwort
Saved image wallflower_0_3.jpg and wrote to CSV with label wallflower
Saved image rose_0_4.jpg and wrote to CSV with label rose
Saved image sword_lily_0_5.jpg and wrote to CSV with label sword_lily
Saved image water_lily_0_6.jpg and wrote to CSV with label water_lily
Saved image azalea_0_7.jpg and wrote to CSV with label azalea
Saved image osteospermum_0_8.jpg and wrote to CSV with label osteospermum
Saved image pink-yellow_dahlia_0_9.jpg and wrote to CSV with label pink-yellow_dahlia
Saved image morning_glory_0_10.jpg and wrote to CSV with label morning_glory
Saved image desert-rose_0_11.jpg and wrote to CSV with label desert-rose
Saved image windflower_0_12.jpg and wrote to CSV with label windflower
Saved image balloon_flower_0_13.jpg and wrote to CSV with label balloon_flower
Saved image tree_poppy_0_14.jpg and wrote to CSV with labe