In [1]:
!cp /kaggle/input/setupfiles/environment.yml /kaggle/working/
!cp /kaggle/input/setupfiles/install.sh /kaggle/working/


# Dependency Installation

In [2]:
!bash install.sh

Installing dependencies from environment.yml using pip...
Environment setup completed successfully!


# Imports

In [3]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, random_split
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import torch.nn.functional as F

# Class Descriptions

In [4]:
class birdClassifier(nn.Module):
    def __init__(self):
        super(birdClassifier, self).__init__()
        print("Initializing birdClassifier")
        self.conv_layers = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),  # Conv1: 64 filters
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),  # Conv2: 64 filters
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Pooling reduces size by half
            # Block 2
            nn.Conv2d(
                64, 128, kernel_size=3, stride=1, padding=1
            ),  # Conv3: 128 filters
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(
                128, 128, kernel_size=3, stride=1, padding=1
            ),  # Conv4: 128 filters
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Block 3
            nn.Conv2d(
                128, 256, kernel_size=3, stride=1, padding=1
            ),  # Conv5: 256 filters
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(
                256, 256, kernel_size=3, stride=1, padding=1
            ),  # Conv6: 256 filters
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(
                256, 256, kernel_size=3, stride=1, padding=1
            ),  # Conv7: 256 filters
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Block 4
            nn.Conv2d(
                256, 512, kernel_size=3, stride=1, padding=1
            ),  # Conv8: 512 filters
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(
                512, 512, kernel_size=3, stride=1, padding=1
            ),  # Conv9: 512 filters
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(
                512, 512, kernel_size=3, stride=1, padding=1
            ),  # Conv10: 512 filters
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # Block 5
            nn.Conv2d(
                512, 1024, kernel_size=3, stride=1, padding=1
            ),  # Conv11: 1024 filters
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.Conv2d(
                1024, 1024, kernel_size=3, stride=1, padding=1
            ),  # Conv12: 1024 filters
            nn.BatchNorm2d(1024),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(1024 * 7 * 7, 4096),  # Fully connected layer 1
            nn.BatchNorm1d(4096),
            nn.ReLU(),
            nn.Dropout(0.5),  # Dropout for regularization
            nn.Linear(4096, 4096),  # Fully connected layer 2
            nn.BatchNorm1d(4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 10))  # Output layer (10 bird classes))

        self._initialize_weights()

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction="mean"):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction="none")
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * ce_loss

        if self.reduction == "mean":
            return focal_loss.mean()
        elif self.reduction == "sum":
            return focal_loss.sum()
        else:
            return focal_loss


def train_model(
    model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    device,
    num_epochs=60,
    patience=10,
):
    print("Starting model training")
    model.train()
    best_val_loss = float("inf")
    epochs_without_improvement = 0
    best_model_state = None

    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 100 == 99:  # Print every 100 mini-batches
                print(
                    f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.6f}"
                )
                print(f"Sample outputs: {outputs[0][:5]}")
                print(f"Sample labels: {labels[0]}")
                running_loss = 0.0

        # Validation phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch + 1} Validation Loss: {val_loss:.6f}")

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            best_model_state = model.state_dict()
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f"Early stopping triggered after {epoch + 1} epochs")
                model.load_state_dict(best_model_state)
                break

    print("Finished Training")
    return model



In [5]:
print("Script started")
dataPath = "/kaggle/input/identify-the-birds/Birds/train"
trainStatus = "train"
modelPath = sys.argv[3] if len(sys.argv) > 3 else "model.pth"

print(f"Data path: {dataPath}")
print(f"Train status: {trainStatus}")
print(f"Model path: {modelPath}")



print("Entering training mode")

# Set up data transformations
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]
)

# Load dataset
full_dataset = datasets.ImageFolder(root=dataPath, transform=transform)

# Create train-test split
train_indices, test_indices = train_test_split(
    range(len(full_dataset)),
    test_size=0.2,
    random_state=42,
    stratify=full_dataset.targets,
)

train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

# Split the training data into training and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)
label_counts = {}
for _, labels in train_loader:
    for label in labels:
        label_counts[label.item()] = label_counts.get(label.item(), 0) + 1
print("Label distribution:", label_counts)


# Check input data range
for inputs, labels in train_loader:
    print(f"Input data range: [{inputs.min().item():.2f}, {inputs.max().item():.2f}]")
    break
    

Script started
Data path: /kaggle/input/identify-the-birds/Birds/train
Train status: train
Model path: --HistoryManager.hist_file=:memory:
Entering training mode
Label distribution: {0: 686, 6: 764, 5: 779, 7: 613, 3: 522, 8: 607, 2: 461, 9: 549, 1: 622, 4: 771}
Input data range: [0.00, 1.00]


Run your .py file on CLI using the following command

In [6]:
# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create model instance
model = birdClassifier().to(device)
print("Model created and moved to device")
# Set up loss function and optimizer
criterion = FocalLoss(alpha=1, gamma=2)
optimizer = optim.Adam(model.parameters(), lr=0.1)

# Train the model with early stopping
model = train_model(
    model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    device,
    num_epochs=60,
    patience=5,
)

# Save the trained model
torch.save(model.state_dict(), modelPath)
print(f"Model saved to {modelPath}")
# hello world
# Evaluate the model on the test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy on the test set: {accuracy:.2f}%")


Using device: cuda
Initializing birdClassifier
Model created and moved to device
Starting model training
[Epoch 1, Batch 100] Loss: 78.928269
Sample outputs: tensor([  27.1459, -110.0038,   19.0432,  -34.3762,  -26.9180],
       device='cuda:0', grad_fn=<SliceBackward0>)
Sample labels: 8
[Epoch 1, Batch 200] Loss: 31.271787
Sample outputs: tensor([ 0.2072,  1.7453, -2.6475,  2.0727,  0.1883], device='cuda:0',
       grad_fn=<SliceBackward0>)
Sample labels: 9
[Epoch 1, Batch 300] Loss: 4.887303
Sample outputs: tensor([ 0.0947,  0.2200, -0.6151, -0.3918,  0.4836], device='cuda:0',
       grad_fn=<SliceBackward0>)
Sample labels: 9
[Epoch 1, Batch 400] Loss: 1.889416
Sample outputs: tensor([ 0.2301, -0.0640, -0.5489, -0.1720,  0.3677], device='cuda:0',
       grad_fn=<SliceBackward0>)
Sample labels: 3
[Epoch 1, Batch 500] Loss: 1.891694
Sample outputs: tensor([-0.1423,  0.5796, -0.3169, -0.4056,  0.2484], device='cuda:0',
       grad_fn=<SliceBackward0>)
Sample labels: 5
[Epoch 1, Batch 60