# Importing all necessary libraries

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os 
from PIL import Image
import requests
from io import BytesIO
from sklearn.decomposition import PCA

# Function to download image from URL

In [5]:
# Set the path where you want to save the dataset
download_path = "C:/Users/tjerk/Downloads/Animals"

# Ensure the directory exists
os.makedirs(download_path, exist_ok=True)

# Use the Kaggle API to download the dataset
os.environ["KAGGLE_CONFIG_DIR"] = os.path.expanduser("~/.kaggle")
dataset_identifier = "borhanitrash/animal-image-classification-dataset"

!kaggle datasets download -d {dataset_identifier} -p {download_path} --unzip




Dataset URL: https://www.kaggle.com/datasets/borhanitrash/animal-image-classification-dataset


  0%|          | 0.00/38.4M [00:00<?, ?B/s]
  3%|▎         | 1.00M/38.4M [00:00<00:20, 1.89MB/s]
  8%|▊         | 3.00M/38.4M [00:00<00:07, 5.12MB/s]
 13%|█▎        | 5.00M/38.4M [00:00<00:04, 7.98MB/s]
 18%|█▊        | 7.00M/38.4M [00:00<00:03, 10.1MB/s]
 23%|██▎       | 9.00M/38.4M [00:01<00:02, 11.3MB/s]
 29%|██▊       | 11.0M/38.4M [00:01<00:02, 12.6MB/s]
 34%|███▍      | 13.0M/38.4M [00:01<00:01, 13.7MB/s]
 39%|███▉      | 15.0M/38.4M [00:01<00:01, 14.6MB/s]
 44%|████▍     | 17.0M/38.4M [00:01<00:01, 15.1MB/s]
 50%|████▉     | 19.0M/38.4M [00:01<00:01, 15.6MB/s]
 55%|█████▍    | 21.0M/38.4M [00:01<00:01, 16.2MB/s]
 60%|█████▉    | 23.0M/38.4M [00:02<00:00, 16.3MB/s]
 65%|██████▌   | 25.0M/38.4M [00:02<00:00, 16.4MB/s]
 70%|███████   | 27.0M/38.4M [00:02<00:00, 16.4MB/s]
 76%|███████▌  | 29.0M/38.4M [00:02<00:00, 16.5MB/s]
 81%|████████  | 31.0M/38.4M [00:02<00:00, 16.3MB/s]
 86%|████████▌ | 33.0M/38.4M [00:02<00:00, 16.3MB/s]
 91%|█████████ | 35.0M/38.4M [00:02<00:00, 16.2MB/s]
 


License(s): MIT
Downloading animal-image-classification-dataset.zip to C:/Users/tjerk/Downloads/Animals



# Example image URLs 

In [9]:
dataset_paths = {
    "dogs": r"C:\Users\tjerk\Downloads\Animals\dogs",
    "cats": r"C:\Users\tjerk\Downloads\Animals\cats",
    "snakes": r"C:\Users\tjerk\Downloads\Animals\snakes",
}


# Load images and store them in numpy arrays

In [11]:
images = {"dogs": [], "cats": [], "snakes": []}

for class_name, folder in dataset_paths.items():  # Use dataset_paths, not dataset_urls
    for img_name in os.listdir(folder):
        img_path = os.path.join(folder, img_name)
        if img_path.endswith(('.jpg', '.png', '.jpeg')):  # Ensure it's an image
            images[class_name].append(img_path)

print("Dog images:", len(images["dogs"]))
print("Cat images:", len(images["cats"]))
print("Snake images:", len(images["snakes"]))


Dog images: 1000
Cat images: 1000
Snake images: 1000


# Display basic information about the dataset

In [13]:
for class_name, img_list in images.items():
    if img_list:
        print(f"Class: {class_name}")
        print(f"Number of images: {len(img_list)}")
        
        # Open the first image to get its dimensions
        first_image_path = img_list[0]
        with Image.open(first_image_path) as img:
            print(f"Image dimensions: {img.size}")  # img.size gives (width, height)
        print()


Class: dogs
Number of images: 1000
Image dimensions: (256, 256)

Class: cats
Number of images: 1000
Image dimensions: (256, 256)

Class: snakes
Number of images: 1000
Image dimensions: (256, 256)



# Display a 3x3 grid of random images per class

In [16]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

# Example function to display the images
def display_grid(img_list, class_name):
    fig, axs = plt.subplots(3, 3, figsize=(9, 9))
    for i, ax in enumerate(axs.flatten()):
        # Open and convert the image to an array
        img_path = img_list[i]
        with Image.open(img_path) as img:
            img_array = np.array(img)
        
        # Display the image in the subplot
        ax.imshow(img_array)
        ax.axis('off')
        
    plt.suptitle(f"Sample Images from {class_name}", fontsize=16)
    plt.show()

# Assuming images is a dictionary of image paths, e.g.:
images = {"dogs": ["path/to/dog1.jpg", "path/to/dog2.jpg", ...]}

# For each class, check if there are at least 9 images and display them
for class_name, img_list in images.items():
    if len(img_list) >= 9:
        display_grid(img_list, class_name)


# Create RGB histograms

In [None]:
# Define directory paths for each class
image_dirs = {
    "dogs": "C:/Users/tjerk/Downloads/Animals/dogs",
    "cats": "C:/Users/tjerk/Downloads/Animals/cats",
    "snakes": "C:/Users/tjerk/Downloads/Animals/snakes"
}

# Dictionary to store the image paths for each class
images = {"dogs": [], "cats": [], "snakes": []}

# Loop through each class (dogs, cats, snakes)
for class_name, image_dir in image_dirs.items():
    # Check if the directory exists
    if os.path.exists(image_dir):
        print(f"Found directory for {class_name}: {image_dir}")
        
        # Loop through all files in the directory
        for filename in os.listdir(image_dir):
            # Check if the file is a valid image (filter by .jpg, .png, etc.)
            if filename.endswith(".jpg"):  # You can add more file extensions if needed
                image_path = os.path.join(image_dir, filename)
                images[class_name].append(image_path)

        # Check if any images were found for this class
        print(f"Found {len(images[class_name])} images in {class_name}.")
    else:
        print(f"Directory for {class_name} not found at {image_dir}.")

# Example: Checking the first image path for each class
for class_name in images:
    if images[class_name]:
        print(f"First image path for {class_name}: {images[class_name][0]}")
    else:
        print(f"No images found for {class_name}.")



Found directory for dogs: C:/Users/tjerk/Downloads/Animals/dogs
Found 1000 images in dogs.
Found directory for cats: C:/Users/tjerk/Downloads/Animals/cats
Found 1000 images in cats.
Found directory for snakes: C:/Users/tjerk/Downloads/Animals/snakes
Found 1000 images in snakes.
First image path for dogs: C:/Users/tjerk/Downloads/Animals/dogs\1_0001.jpg
First image path for cats: C:/Users/tjerk/Downloads/Animals/cats\0_0001.jpg
First image path for snakes: C:/Users/tjerk/Downloads/Animals/snakes\2_0001.jpg


# Convert images to HSL and create histograms

In [29]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os

# Define directory paths for each class
image_dirs = {
    "dogs": "C:/Users/tjerk/Downloads/Animals/dogs",
    "cats": "C:/Users/tjerk/Downloads/Animals/cats",
    "snakes": "C:/Users/tjerk/Downloads/Animals/snakes"
}

# Dictionary to store the image paths for each class
images = {"dogs": [], "cats": [], "snakes": []}

# Loop through each class (dogs, cats, snakes)
for class_name, image_dir in image_dirs.items():
    # Check if the directory exists
    if os.path.exists(image_dir):
        print(f"Found directory for {class_name}: {image_dir}")
        
        # Loop through all files in the directory
        for filename in os.listdir(image_dir):
            # Check if the file is a valid image (filter by .jpg, .png, etc.)
            if filename.endswith((".jpg", ".png", ".jpeg")):  # You can add more file extensions if needed
                image_path = os.path.join(image_dir, filename)
                images[class_name].append(image_path)

        # Check if any images were found for this class
        print(f"Found {len(images[class_name])} images in {class_name}.")
    else:
        print(f"Directory for {class_name} not found at {image_dir}.")

# Function to convert RGB to HSL manually
def rgb_to_hsl(r, g, b):
    r, g, b = r / 255.0, g / 255.0, b / 255.0
    max_val = max(r, g, b)
    min_val = min(r, g, b)
    delta = max_val - min_val

    # Lightness
    l = (max_val + min_val) / 2.0

    # Saturation
    if delta == 0:
        s = 0
        h = 0  # Undefined
    else:
        if l < 0.5:
            s = delta / (max_val + min_val)
        else:
            s = delta / (2.0 - max_val - min_val)

        # Hue
        if max_val == r:
            h = (g - b) / delta
        elif max_val == g:
            h = (b - r) / delta + 2
        else:
            h = (r - g) / delta + 4

        h /= 6.0
        if h < 0:
            h += 1.0

    return h * 360, s * 100, l * 100  # Return Hue (degrees), Saturation (%), Lightness (%)

# Function to plot histograms for HSL values
def plot_hsl_histograms(img_list, class_name):
    # Initialize lists to hold the HSL values for all images
    h_vals, s_vals, l_vals = [], [], []
    
    for img_path in img_list:
        try:
            # Open image and convert it to RGB
            with Image.open(img_path) as img:
                # Convert to RGB if not already in RGB
                img = img.convert("RGB")
                
                # Convert image to a NumPy array
                img_array = np.array(img)

                # Extract HSL values
                for row in img_array:
                    for pixel in row:
                        r, g, b = pixel  # RGB values
                        h, s, l = rgb_to_hsl(r, g, b)  # Convert to HSL
                        h_vals.append(h)
                        s_vals.append(s)
                        l_vals.append(l)
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")
            continue

    # Plot HSL histograms
    plt.figure(figsize=(12, 6))
    
    # Plot histogram for Hue
    plt.subplot(1, 3, 1)
    plt.hist(h_vals, bins=256, color='red', alpha=0.6)  # Hue color set to red
    plt.title(f"Hue Histogram for {class_name}")
    plt.xlabel("Hue Value")
    plt.ylabel("Frequency")
    
    # Plot histogram for Saturation
    plt.subplot(1, 3, 2)
    plt.hist(s_vals, bins=256, color='green', alpha=0.6)  # Saturation color set to green
    plt.title(f"Saturation Histogram for {class_name}")
    plt.xlabel("Saturation Value")
    plt.ylabel("Frequency")
    
    # Plot histogram for Lightness
    plt.subplot(1, 3, 3)
    plt.hist(l_vals, bins=256, color='blue', alpha=0.6)  # Lightness color set to blue
    plt.title(f"Lightness Histogram for {class_name}")
    plt.xlabel("Lightness Value")
    plt.ylabel("Frequency")
    
    plt.tight_layout()
    plt.show()

# For each class, plot the HSL histograms
for class_name, img_list in images.items():
    if img_list:
        print(f"Plotting HSL histograms for {class_name}...")
        plot_hsl_histograms(img_list, class_name)




Found directory for dogs: C:/Users/tjerk/Downloads/Animals/dogs
Found 1000 images in dogs.
Found directory for cats: C:/Users/tjerk/Downloads/Animals/cats
Found 1000 images in cats.
Found directory for snakes: C:/Users/tjerk/Downloads/Animals/snakes
Found 1000 images in snakes.
Plotting HSL histograms for dogs...



KeyboardInterrupt



<Figure size 1200x600 with 0 Axes>

# Dimensionality reduction using PCA