## Modeling with CNN for Wildlife Image Classification

The data for this project is sourced from https://www.kaggle.com/datasets/akash2907/bird-species-classification 

### 1. Import Packages and Load Data

In [1]:
# Import necessary modules
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import imageio.v2 as imageio
import cv2


In [28]:
import keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


In [35]:
# set path and folder names
train_path = "../jpeg/train"
test_path = "../jpeg/test"

folder_names = os.listdir(train_path)[1:]

In [18]:
# load images

# Create lists to store image data and labels
image_data=[]
image_labels = []

for folder in folder_names:
    img_path = os.path.join(train_path, folder) 
    
    for filename in os.listdir(img_path):
        if filename.endswith((".jpg",".JPG")):
            img = imageio.imread(os.path.join(img_path, filename))
            img = cv2.resize(img, (300, 300)) #resize
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY ) #convert to grayscale

            image_data.append(img)
            image_labels.append(folder)

# Convert data to numpy array
image_data = np.array(image_data)
print("success")

success


In [19]:
print('Shape of x_train array:', x_train.shape)
print('Shape of x_train labels:', len(image_labels))

Shape of x_train array: (149, 300, 300)
Shape of x_train labels: 149


#### Get the label mappings
The labels dictionary matches class names against the label indices used for training the model.

In [27]:
# Create a dictionary to map image names to their corresponding integer labels
name_to_label = {name: label for label, name in enumerate(folder_names)}

# Create a list to store the integer labels for each image
integer_labels = []

# Loop through the list of image names and assign the corresponding integer label
for name in image_labels:
    label = name_to_label.get(name)
    if label is not None:
        integer_labels.append(label)
    else:
        # Handle the case when the image name is not found in the mapping
        # Here, we'll set it to -1 to indicate an unknown label
        integer_labels.append(-1)

# Print the list of integer labels
#print(integer_labels)
print(len(integer_labels))

149


### Build base model
First, we need to one-hot-encode the target variable. 

In [29]:
import keras
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical


In [33]:
# Convert labels to one-hot encoded vectors
num_classes = 16  
one_hot_labels = to_categorical(integer_labels, num_classes=num_classes)

# Print the one-hot encoded labels
print(one_hot_labels)
one_hot_labels.shape

[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]


(149, 16)

### 2. Load Data and Apply Augmentations

Loading the training dataset and applying augmentations using ImageDataGenerator.

In [44]:
# Set the image and batch size
image_size = (150, 150)
batch_size = 32

# Number of classes
num_classes = 16


In [45]:
# Create an ImageDataGenerator for data augmentation and preprocessing
datagen = ImageDataGenerator(
    rescale=1.0 / 255,      # Normalize pixel values to [0, 1]
#    rotation_range=20,      # Randomly rotate images within the range of 20 degrees
#    width_shift_range=0.1,  # Randomly shift images horizontally within 10% of the image width
#    height_shift_range=0.1, # Randomly shift images vertically within 10% of the image height
#    horizontal_flip=True,   # Randomly flip images horizontally
    validation_split=0.2    # Split the data into 80% for training and 20% for validation
)

# Load and preprocess images from the directory
train_generator = datagen.flow_from_directory(
    train_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',  # Set to 'categorical' for one-hot encoded labels
    subset='training'          # Use the training subset of your data
)

validation_generator = datagen.flow_from_directory(
    train_path,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',  # Set to 'categorical' for one-hot encoded labels
    subset='validation'        # Use the validation subset of your data
)


Found 123 images belonging to 16 classes.
Found 26 images belonging to 16 classes.


In [46]:
# Create the model
model = Sequential()

# Add a 2D Convolutional layer
model.add(Conv2D(32, kernel_size=(5, 5), padding='valid', activation='relu', input_shape=(image_size[0], image_size[1], 3)))

# Add a MaxPooling layer
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add more Conv2D and MaxPooling2D layers as needed later ...

# Flatten the output from Convolutional layers
model.add(Flatten())

# Add a fully connected Dense layer
model.add(Dense(128, activation='relu'))

# Add the final Dense layer with softmax activation for multi-class classification
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Summary of the model architecture
model.summary()


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 146, 146, 32)      2432      
                                                                 
 max_pooling2d (MaxPooling2  (None, 73, 73, 32)        0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 170528)            0         
                                                                 
 dense (Dense)               (None, 128)               21827712  
                                                                 
 dense_1 (Dense)             (None, 16)                2064      
                                                                 
Total params: 21832208 (83.28 MB)
Trainable params: 21832208 (83.28 MB)
Non-trainable params: 0 (0.00 Byte)
____________

In [47]:
# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
)


  model.fit_generator(


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2c6179d50>