In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
# import os
# import numpy as np
# from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img
# from PIL import Image  # For compression

# # Define the root directory for your dataset and the output directory for augmented images
# root_folder = '/content/drive/MyDrive/mashroom_task/dataset'  # Path to your root folder
# output_folder = '/content/drive/MyDrive/mashroom_task/aug-data'  # Path to save augmented images

# # Create the output folder structure (same as the original)
# os.makedirs(output_folder, exist_ok=True)
# classes = os.listdir(root_folder)  # List of class folders

# # Loop through each class
# for cls in classes:
#     os.makedirs(os.path.join(output_folder, cls), exist_ok=True)  # Create class folder in output directory

# # Define the ImageDataGenerator for data augmentation
# datagen = ImageDataGenerator(
#     rotation_range=40,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     fill_mode='nearest'
# )

# # Iterate through each class and augment images
# for cls in classes:
#     cls_input_path = os.path.join(root_folder, cls)  # Path to current class folder
#     cls_output_path = os.path.join(output_folder, cls)  # Path to save augmented images for this class

#     images = os.listdir(cls_input_path)  # List of images in the current class folder

#     # Loop through each image in the class
#     for img_name in images:
#         img_path = os.path.join(cls_input_path, img_name)

#         try:
#             # Load and preprocess the image
#             img = load_img(img_path)  # Load image
#             img_array = img_to_array(img)  # Convert image to array
#             img_array = np.expand_dims(img_array, axis=0)  # Expand dimensions for batch processing

#             # Save the original image to the output folder
#             original_img_path = os.path.join(cls_output_path, img_name)
#             img.save(original_img_path, "JPEG", quality=85)  # Compress original image

#             # Generate and save augmented images
#             for i, batch in enumerate(datagen.flow(img_array, batch_size=1, save_to_dir=None)):
#                 augmented_img_name = f"{os.path.splitext(img_name)[0]}_aug_{i + 1}.jpg"
#                 augmented_img_path = os.path.join(cls_output_path, augmented_img_name)

#                 # Save the augmented image with compression
#                 augmented_img = Image.fromarray(batch[0].astype('uint8'))
#                 augmented_img.save(augmented_img_path, "JPEG", quality=85)  # Save as JPG with compression

#                 if i >= 1:  # Generate only 2 augmented images per input
#                     break
#         except Exception as e:
#             print(f"Error processing {img_name} in {cls}: {e}")

# print("Data augmentation completed!")


In [2]:
import os
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Define the dataset directory and output directories
data_dir = '/content/drive/MyDrive/mashroom_task/dataset'
output_dir = '/content/drive/MyDrive/mashroom_task/split_dataset'
classes = os.listdir(data_dir)  # Assuming each folder represents a class

# Create output directories for train, val, and test splits
split_ratios = {'train': 0.6, 'val': 0.2, 'test': 0.2}
for split in split_ratios:
    for cls in classes:
        os.makedirs(os.path.join(output_dir, split, cls), exist_ok=True)

# Split data into train, val, and test
for cls in classes:
    cls_path = os.path.join(data_dir, cls)
    images = os.listdir(cls_path)
    train_files, temp_files = train_test_split(images, test_size=1 - split_ratios['train'], random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=split_ratios['test'] / (split_ratios['val'] + split_ratios['test']), random_state=42)

    for file in train_files:
        shutil.copy(os.path.join(cls_path, file), os.path.join(output_dir, 'train', cls))
    for file in val_files:
        shutil.copy(os.path.join(cls_path, file), os.path.join(output_dir, 'val', cls))
    for file in test_files:
        shutil.copy(os.path.join(cls_path, file), os.path.join(output_dir, 'test', cls))

# Define data generators
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(
    rescale=1.0/255
)

# Define data directories and parameters
target_size = (224, 224)
batch_size = 32

# Create data flows for train, validation, and test datasets
train = train_datagen.flow_from_directory(
    os.path.join(output_dir, 'train'),
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical'
)

validation = validation_datagen.flow_from_directory(
    os.path.join(output_dir, 'val'),
    target_size=target_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test = validation_datagen.flow_from_directory(
    os.path.join(output_dir, 'test'),
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False,
    class_mode='categorical'
)

# Verify class indices
print("Class indices:", train.class_indices)


Found 3064 images belonging to 6 classes.
Found 1021 images belonging to 6 classes.
Found 1021 images belonging to 6 classes.
Class indices: {'Agaricus': 0, 'Blue_Oyster_Mushroom': 1, 'Oyster_Mushroom': 2, 'Phoenix_Oyster_Mushrooms': 3, 'Pink_Oyster_Mushroom': 4, 'poisonous_mushroom_sporocarp': 5}
