In [2]:
import kagglehub
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# 1. Download dataset
path = kagglehub.dataset_download("kedarsai/bird-species-classification-220-categories")
train_dir = os.path.join(path, 'Train')
test_dir = os.path.join(path, 'Test')

# 2. Get class names dynamically
class_names = sorted([d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))])
num_classes = len(class_names)
print(f"Number of classes: {num_classes}")

# 3. Hyperparameters
batch_size = 64
num_epochs = 12
   # For prototype
learning_rate = 0.001

# 4. Data transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((128,128)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ])
}

# 5. Datasets & loaders
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(test_dir, transform=data_transforms['val'])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# 6. Model setup (ResNet18)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# 7. Loss & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 8. Training loop with tqdm progress bar
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    for inputs, labels in train_bar:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        train_bar.set_postfix(loss=loss.item())

    epoch_loss = running_loss / len(train_loader.dataset)

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs,1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_accuracy = correct/total
    print(f"Epoch {epoch+1}/{num_epochs} complete — Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

# Save model for prototype
torch.save(model.state_dict(), "resnet18_bird_prototype.pth")
print("Prototype model saved!")


Using Colab cache for faster access to the 'bird-species-classification-220-categories' dataset.
Number of classes: 200


Epoch 1/12: 100%|██████████| 148/148 [04:15<00:00,  1.73s/it, loss=2.28]


Epoch 1/12 complete — Loss: 3.7837, Validation Accuracy: 0.2182


Epoch 2/12: 100%|██████████| 148/148 [04:13<00:00,  1.72s/it, loss=2.86]


Epoch 2/12 complete — Loss: 2.4152, Validation Accuracy: 0.3273


Epoch 3/12: 100%|██████████| 148/148 [04:13<00:00,  1.71s/it, loss=2.86]


Epoch 3/12 complete — Loss: 1.8529, Validation Accuracy: 0.3993


Epoch 4/12: 100%|██████████| 148/148 [04:14<00:00,  1.72s/it, loss=1.35]


Epoch 4/12 complete — Loss: 1.4605, Validation Accuracy: 0.4061


Epoch 5/12: 100%|██████████| 148/148 [04:12<00:00,  1.71s/it, loss=1.31]


Epoch 5/12 complete — Loss: 1.0950, Validation Accuracy: 0.3997


Epoch 6/12: 100%|██████████| 148/148 [04:19<00:00,  1.75s/it, loss=1.37]


Epoch 6/12 complete — Loss: 0.8813, Validation Accuracy: 0.4545


Epoch 7/12: 100%|██████████| 148/148 [04:18<00:00,  1.74s/it, loss=2.36]


Epoch 7/12 complete — Loss: 0.7306, Validation Accuracy: 0.4351


Epoch 8/12: 100%|██████████| 148/148 [04:16<00:00,  1.73s/it, loss=2.2]


Epoch 8/12 complete — Loss: 0.6409, Validation Accuracy: 0.4532


Epoch 9/12: 100%|██████████| 148/148 [04:13<00:00,  1.71s/it, loss=1.37]


Epoch 9/12 complete — Loss: 0.5408, Validation Accuracy: 0.4583


Epoch 10/12: 100%|██████████| 148/148 [04:16<00:00,  1.73s/it, loss=1.08]


Epoch 10/12 complete — Loss: 0.4226, Validation Accuracy: 0.4958


Epoch 11/12: 100%|██████████| 148/148 [04:12<00:00,  1.71s/it, loss=0.756]


Epoch 11/12 complete — Loss: 0.3008, Validation Accuracy: 0.4781


Epoch 12/12: 100%|██████████| 148/148 [04:14<00:00,  1.72s/it, loss=0.173]


Epoch 12/12 complete — Loss: 0.2595, Validation Accuracy: 0.4844
Prototype model saved!


In [3]:
# Set the model to evaluation mode
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

final_accuracy = correct / total
print(f'Final Validation Accuracy: {final_accuracy:.4f}')

Final Validation Accuracy: 0.4844


In [4]:
# Define a path to save the model
model_save_path = 'vgg16_bird_classifier.pth'

# Save the model's state_dict
torch.save(model.state_dict(), model_save_path)

print(f"Model saved to {model_save_path}")

Model saved to vgg16_bird_classifier.pth


In [5]:
# To load the model, first initialize the same model architecture
loaded_model = models.resnet18() # Changed from vgg16 to resnet18

# Replace the classifier layer just as you did before
num_ftrs = loaded_model.fc.in_features # Changed from classifier[6].in_features to fc.in_features
loaded_model.fc = nn.Linear(num_ftrs, num_classes) # Changed from classifier[6] to fc

# Load the saved state_dict
loaded_model.load_state_dict(torch.load(model_save_path))

# Set the loaded model to evaluation mode for inference
loaded_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [1]:
%pip install gradio



In [4]:
import gradio as gr
import torch
from torchvision import models, transforms
from PIL import Image
import os
import json
from torchvision import models
import torch.nn as nn

# --- 1. Model and Label Setup ---
# This script is a self-contained example that will create a dummy model
# and dummy example images to ensure the Gradio interface launches.

# Define the number of output classes and class labels
NUM_CLASSES = 200  # use the real number of classes
# Assuming 'class_names' is defined in a previous cell and contains the list of class names
CLASS_LABELS = class_names

# Load a pre-trained ResNet18 model
model = models.resnet18() # Changed from vgg16 to resnet18

# Replace the classifier layer
num_ftrs = model.fc.in_features # Changed from classifier[6].in_features to fc.in_features
model.fc = nn.Linear(num_ftrs, NUM_CLASSES) # Changed from classifier[6] to fc

# Load trained weights
model_save_path = "resnet18_bird_prototype.pth" # Changed to the correct model path
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

try:
    model.load_state_dict(torch.load(model_save_path, map_location=device))
    model.to(device)
    model.eval()  # Set the model to evaluation mode
    print(f"Model loaded successfully from '{model_save_path}'.")
except FileNotFoundError:
    print(f"Error: Model file not found at '{model_save_path}'. Please ensure the model has been trained and saved.")
    # Exit or handle the error appropriately if the model file is not found
    exit()
except Exception as e:
    print(f"An error occurred while loading the model: {e}")
    # Exit or handle the error appropriately if model loading fails
    exit()


# Define the image transformations.
preprocess = transforms.Compose([
    transforms.Resize((128, 128)), # Changed from 224x224 to 128x128
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# --- 2. Prediction Function ---
def classify_bird(image: Image.Image):
    """
    Classifies a bird image using the loaded PyTorch model.
    """
    if image is None:
        return {}

    image_tensor = preprocess(image)
    image_tensor = image_tensor.unsqueeze(0)
    image_tensor = image_tensor.to(device)

    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)[0]

    confidences = {CLASS_LABELS[i]: float(probabilities[i]) for i in range(len(CLASS_LABELS))}
    sorted_confidences = sorted(confidences.items(), key=lambda item: item[1], reverse=True)
    return dict(sorted_confidences)

# --- 3. Gradio Interface Setup ---
# Create a dummy 'images' directory and placeholder image files for Gradio examples
images_dir = 'images'
os.makedirs(images_dir, exist_ok=True)
dummy_image_paths = []
for filename in ["blue_jay.jpg", "bald_eagle.jpg", "pigeon.jpg"]:
    path = os.path.join(images_dir, filename)
    # Create a dummy image with the correct size
    Image.new('RGB', (128, 128), color = 'white').save(path) # Changed size
    dummy_image_paths.append(path)
print(f"Dummy example images created in '{images_dir}' directory.")

# Define the input and output components for the UI.
image_input = gr.Image(type="pil", label="Upload an image of a bird")
label_output = gr.Label(num_top_classes=3, label="Prediction")

# Create the Gradio interface with dynamic examples
gr.Interface(
    fn=classify_bird,
    inputs=image_input,
    outputs=label_output,
    title="Bird Species Recognizer",
    description="Upload an image to recognize the bird species. This is a demo; a real model file is required for accurate predictions.",
    examples=dummy_image_paths
).launch()

Model loaded successfully from 'resnet18_bird_prototype.pth'.
Dummy example images created in 'images' directory.
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://19912a3fd3ae90bf5d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


