In [8]:
import os

# Paths to train and test folders
train_folder_path = r"C:\Users\Shash\OneDrive\Documents\GitHub\Indian-Birds-Classification\Birds_25\train"
test_folder_path = r"C:\Users\Shash\OneDrive\Documents\GitHub\Indian-Birds-Classification\Birds_25\valid"

# Check if paths exist
print(f"Train folder exists: {os.path.exists(train_folder_path)}")
print(f"Test folder exists: {os.path.exists(test_folder_path)}")

# List contents of the train folder
if os.path.exists(train_folder_path):
    print("Contents of the train folder:")
    print(os.listdir(train_folder_path))

# List contents of the test folder
if os.path.exists(test_folder_path):
    print("Contents of the test folder:")
    print(os.listdir(test_folder_path))

Train folder exists: True
Test folder exists: True
Contents of the train folder:
['Asian-Green-Bee-Eater', 'Brown-Headed-Barbet', 'Cattle-Egret', 'Common-Kingfisher', 'Common-Myna', 'Common-Rosefinch', 'Common-Tailorbird', 'Coppersmith-Barbet', 'Forest-Wagtail', 'Gray-Wagtail', 'Hoopoe', 'House-Crow', 'Indian-Grey-Hornbill', 'Indian-Peacock', 'Indian-Pitta', 'Indian-Roller', 'Jungle-Babbler', 'Northern-Lapwing', 'Red-Wattled-Lapwing', 'Ruddy-Shelduck', 'Rufous-Treepie', 'Sarus-Crane', 'White-Breasted-Kingfisher', 'White-Breasted-Waterhen', 'White-Wagtail']
Contents of the test folder:
['Asian-Green-Bee-Eater', 'Brown-Headed-Barbet', 'Cattle-Egret', 'Common-Kingfisher', 'Common-Myna', 'Common-Rosefinch', 'Common-Tailorbird', 'Coppersmith-Barbet', 'Forest-Wagtail', 'Gray-Wagtail', 'Hoopoe', 'House-Crow', 'Indian-Grey-Hornbill', 'Indian-Peacock', 'Indian-Pitta', 'Indian-Roller', 'Jungle-Babbler', 'Northern-Lapwing', 'Red-Wattled-Lapwing', 'Ruddy-Shelduck', 'Rufous-Treepie', 'Sarus-Crane',

## **Importing the libraries**

In [2]:
import cv2                                            # Library for image processing- OpenCV
import os                                             # Library for operating system
from sklearn.model_selection import train_test_split  # Library for ML - Scikit-Learn
import numpy as np                                    

## **Loading the dataset**

In [3]:
# Paths to train and test folders
train_folder_path = r"C:\Users\Shash\OneDrive\Documents\GitHub\Indian-Birds-Classification\Birds_25\train"
test_folder_path = r"C:\Users\Shash\OneDrive\Documents\GitHub\Indian-Birds-Classification\Birds_25\valid"

In [4]:
# Initialize lists to store image paths and labels for both training and testing
train_image_paths = []
train_labels = []
test_image_paths = []
test_labels = []

# Dictionary to map class names (bird names) to numerical labels
label_dict = {}

# Function to load images from a given folder (Train or Test) and assign labels based on folder names
def load_images_from_folder(folder_path, image_paths_list, labels_list, label_dict):
    for bird_name in os.listdir(folder_path):
        bird_folder = os.path.join(folder_path, bird_name)
        if os.path.isdir(bird_folder):
            # If this bird_name is not in the label_dict, assign a new label to it
            if bird_name not in label_dict:
                label_dict[bird_name] = len(label_dict)
            
            # Add image paths and corresponding labels to the list
            for filename in os.listdir(bird_folder):
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    image_paths_list.append(os.path.join(bird_folder, filename))
                    labels_list.append(label_dict[bird_name])


In [5]:
# Load training data
load_images_from_folder(train_folder_path, train_image_paths, train_labels, label_dict)

# Load testing data
load_images_from_folder(test_folder_path, test_image_paths, test_labels, label_dict)

# Convert lists to NumPy arrays
train_image_paths = np.array(train_image_paths)
train_labels = np.array(train_labels)
test_image_paths = np.array(test_image_paths)
test_labels = np.array(test_labels)

In [6]:
# Convert lists to NumPy arrays
train_image_paths = np.array(train_image_paths)
train_labels = np.array(train_labels)
test_image_paths = np.array(test_image_paths)
test_labels = np.array(test_labels)

# Function to preprocess images (resize and normalize)
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.resize(image, (64, 64))  # Resize to 64x64 pixels
        image = image / 255.0  # Normalize pixel values (0-1 range) which is better of ML Models
        return image
    else:
        return np.zeros((64, 64, 3))  # Return a black image if loading fails

# Preprocess training and testing images
train_images = np.array([preprocess_image(path) for path in train_image_paths])
test_images = np.array([preprocess_image(path) for path in test_image_paths])


In [9]:
# Check the shapes of preprocessed images
print(f"Train images shape: {train_images.shape}")
print(f"Train labels shape: {train_labels.shape[0]}")
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape[0]}")

Train images shape: (30000, 64, 64, 3)
Train labels shape: 30000
Test images shape: (7500, 64, 64, 3)
Test labels shape: 7500
