In [8]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt



In [9]:

## preparing data
# Prefix
target_image_width = 160
target_image_height = 130
# parent dataset
dataset_parent_folder = 'dataset'

if not (os.path.exists(dataset_parent_folder) and os.path.isdir(dataset_parent_folder)):
    print(f"Error: Cant find the path {dataset_parent_folder}")
    raise FileNotFoundError(f"Dataset folder could not be found at {dataset_parent_folder}")

print(f"Successfully accessed the parent dataset folder: {dataset_parent_folder}")

# Subdirectories
all_entries = os.listdir(dataset_parent_folder)
# This line ensures all_dog_breed_in_parents contains only names of actual directories
all_dog_breed_in_parents = [entry for entry in all_entries if os.path.isdir(os.path.join(dataset_parent_folder, entry))]

if not all_dog_breed_in_parents:
    print(f"Warning: No breed subdirectories found in '{dataset_parent_folder}'.")
    # You might want to handle this case, e.g., exit or raise an error if no breeds are found
else:
    print(f"Identified breed folders: {all_dog_breed_in_parents}")

# dictionary of images with breed name as key
breed_images_dict = {}

# Loop through the identified breed folder names
for dog_breed in all_dog_breed_in_parents:
    current_breed_path = os.path.join(dataset_parent_folder, dog_breed)
    print(f"\nProcessing breed: {dog_breed}") # Added for better feedback

    files_in_current_breed = os.listdir(current_breed_path)
    images_for_current_breed = []
    loaded_count = 0 # To count images loaded for the current breed

    for file_name in files_in_current_breed:
        image_file_path = os.path.join(current_breed_path, file_name)

        # Check if the file is an image based on its extension
        if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff')):
            try:
                # Open the image using Pillow
                img = Image.open(image_file_path) # CORRECTED: Added closing parenthesis

                # Convert to RGB format for consistency
                img = img.convert('RGB') # CORRECTED: Typo in method and mode

                # Resize the image if it's not the target size
                if img.size != (target_image_width, target_image_height):
                    img = img.resize((target_image_width, target_image_height))

                # Append the loaded (and possibly resized) PIL Image object
                images_for_current_breed.append(img)
                loaded_count +=1

            except Exception as e:
                print(f"    Error loading or processing image '{image_file_path}': {e}")
        # else:
            # Optional: print(f"    Skipping non-image file: {file_name}")

    # Add the list of images for the current breed to the dictionary
    breed_images_dict[dog_breed] = images_for_current_breed
    print(f"  Loaded {loaded_count} images for {dog_breed}.")

print("\n--- Image Loading Process Complete ---")
print("Summary of loaded images:")
for breed, image_list in breed_images_dict.items():
    if image_list:
        first_image = image_list[0]
        print(f"  Breed: {breed}, Images: {len(image_list)}, Type of first image: {type(first_image)}, Size of first image: {first_image.size}")
    else:
        print(f"  Breed: {breed}, Images: 0")

Successfully accessed the parent dataset folder: dataset
Identified breed folders: ['.ipynb_checkpoints', 'Beagle', 'Boxer', 'Dachshund', 'German_Shepherd', 'Golden_Retriever', 'Labrador_Retriever', 'Poodle', 'Rottweiler', 'Yorkshire_Terrier']

Processing breed: .ipynb_checkpoints
  Loaded 0 images for .ipynb_checkpoints.

Processing breed: Beagle
  Loaded 100 images for Beagle.

Processing breed: Boxer
  Loaded 100 images for Boxer.

Processing breed: Dachshund
  Loaded 96 images for Dachshund.

Processing breed: German_Shepherd
  Loaded 96 images for German_Shepherd.

Processing breed: Golden_Retriever
  Loaded 91 images for Golden_Retriever.

Processing breed: Labrador_Retriever
  Loaded 95 images for Labrador_Retriever.

Processing breed: Poodle
  Loaded 100 images for Poodle.

Processing breed: Rottweiler
  Loaded 89 images for Rottweiler.

Processing breed: Yorkshire_Terrier
  Loaded 100 images for Yorkshire_Terrier.

--- Image Loading Process Complete ---
Summary of loaded image

In [None]:
#loading data
# modyfying vectorizing y, returning x and y for each breed
def load_data():
    return 