<a href="https://colab.research.google.com/github/Kaazzz/IS2/blob/main/Model_for_Birds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.4.1-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.12.0-py3-none-any.w

In [2]:
from datasets import load_dataset
import os
from PIL import Image

# Load dataset
ds = load_dataset("yashikota/birds-525-species-image-classification")

# Get bird species names
label_names = ds["train"].features["label"].names  # Mapping index → name

# Set the output directory
output_dir = "bird_dataset"
os.makedirs(output_dir, exist_ok=True)

# Save images into label-based folders
for split in ["train", "validation", "test"]:
    split_dir = os.path.join(output_dir, split)
    os.makedirs(split_dir, exist_ok=True)

    for i, example in enumerate(ds[split]):
        image = example["image"]  # Image data
        label_id = example["label"]  # Numeric label
        label_name = label_names[label_id]  # Convert label number to bird name

        # Ensure image is in PIL format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image)

        # Create a folder for each bird species (label)
        label_dir = os.path.join(split_dir, label_name)
        os.makedirs(label_dir, exist_ok=True)

        # Save the image
        image_path = os.path.join(label_dir, f"{split}_{i}.jpg")
        image.save(image_path)

print(f"Dataset saved in '{output_dir}/' with images organized by bird species names.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/17.8k [00:00<?, ?B/s]

train-00000-of-00004.parquet:   0%|          | 0.00/464M [00:00<?, ?B/s]

train-00001-of-00004.parquet:   0%|          | 0.00/480M [00:00<?, ?B/s]

train-00002-of-00004.parquet:   0%|          | 0.00/494M [00:00<?, ?B/s]

train-00003-of-00004.parquet:   0%|          | 0.00/471M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/60.5M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/60.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/84635 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2625 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2625 [00:00<?, ? examples/s]

Dataset saved in 'bird_dataset/' with images organized by bird species names.


In [3]:
import tensorflow as tf
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
# Load pre-trained MobileNet without the top layer
base_model = tf.keras.applications.MobileNet(
    weights="imagenet",
    include_top=False,
    input_shape=(224, 224, 3)  # Ensure correct input dimensions
)

base_model.trainable = False

dataset_path = "bird_dataset/"
datagen = ImageDataGenerator(rescale=1.0/255)

# Load training set
train_generator = datagen.flow_from_directory(
    dataset_path + "train",
    target_size=(224, 224),  # MobileNet input size
    batch_size=32,
    class_mode="categorical"
)

# Load validation set
val_generator = datagen.flow_from_directory(
    dataset_path + "validation",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

# Load test set (to evaluate final accuracy)
test_generator = datagen.flow_from_directory(
    dataset_path + "test",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical",
    shuffle=False
)


# Add custom layers for fine-tuning
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Pooling layer
x = Dense(1024, activation='relu')(x)  # Fully connected layer
x = Dense(512, activation='relu')(x)   # Another dense layer (optional)
predictions = Dense(525, activation='softmax')(x)  # Output layer (525 bird classes)

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=12, restore_best_weights=True)
# Print model summary
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=80,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator),
    callbacks=[early_stopping]
)








Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_224_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Found 84635 images belonging to 525 classes.
Found 2625 images belonging to 525 classes.
Found 2625 images belonging to 525 classes.
Epoch 1/80


  self._warn_if_super_not_called()


[1m2645/2645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 59ms/step - accuracy: 0.2682 - loss: 4.0632 - val_accuracy: 0.7451 - val_loss: 0.9658
Epoch 2/80
[1m2645/2645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 63ms/step - accuracy: 0.7585 - loss: 0.9845 - val_accuracy: 0.8331 - val_loss: 0.6612
Epoch 3/80
[1m2645/2645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 55ms/step - accuracy: 0.8357 - loss: 0.6497 - val_accuracy: 0.8606 - val_loss: 0.5272
Epoch 4/80
[1m2645/2645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 49ms/step - accuracy: 0.8735 - loss: 0.4915 - val_accuracy: 0.8606 - val_loss: 0.4883
Epoch 5/80
[1m2645/2645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 49ms/step - accuracy: 0.9016 - loss: 0.3826 - val_accuracy: 0.8815 - val_loss: 0.4447
Epoch 6/80
[1m2645/2645[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 50ms/step - accuracy: 0.9219 - loss: 0.3002 - val_accuracy: 0.8701 - val_loss: 0.4696
Epoch 7/8

<keras.src.callbacks.history.History at 0x7d37ba681b10>

In [5]:
loss, accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {accuracy*100:.2f}%")


[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 55ms/step - accuracy: 0.9131 - loss: 0.3191
Test Accuracy: 91.28%


In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image

def preprocess_image(image_path):
    img = image.load_img(image_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = img_array / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

test_folder = "images/"

class_labels = list(train_generator.class_indices.keys())

for filename in os.listdir(test_folder):
    if filename.lower().endswith((".jpg", ".png", ".jpeg")):
        img_path = os.path.join(test_folder, filename)

        img_array = preprocess_image(img_path)

        predictions = model.predict(img_array)
        predicted_class_index = np.argmax(predictions)
        predicted_class = class_labels[predicted_class_index]

        print(f"Image: {filename} | Predicted Bird Species: {predicted_class}")


FileNotFoundError: [Errno 2] No such file or directory: 'images/'

In [7]:
model.save("bird_classification_model.keras")


In [8]:
from google.colab import files
files.download("bird_classification_model.keras")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>