In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
class BirdSpecies(Dataset):
    def __init__(self,data_dir,transform = None):
        self.data = ImageFolder(root = data_dir,transform=transform)
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        return self.data[idx]
    @property
    def classes(self):
        return self.data.classes


In [3]:
transform = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor()
])#everything is uppercase

In [7]:
dataset = BirdSpecies(data_dir='/kaggle/input/100-bird-species/train', transform=transform)

In [8]:
image,label = dataset[0]
len(dataset.classes)

525

In [9]:
class BirdsClassifier(nn.Module):
    def __init__(self,num_classes=525):
        super(BirdsClassifier,self).__init__()
        self.base_model = timm.create_model(model_name = 'efficientnet_b0',pretrained = True)
        self.features = nn.Sequential(*list(self.base_model.children())[:-1])
        out = 1280
        self.classify = nn.Linear(out,num_classes)
    def forward(self,X):
        X = self.features(X)
        predictions = self.classify(X)
        return predictions

In [32]:
list(timm.create_model(model_name = 'efficientnet_b0',pretrained = True).children())[:-1]

[Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
 BatchNormAct2d(
   32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
   (drop): Identity()
   (act): SiLU(inplace=True)
 ),
 Sequential(
   (0): Sequential(
     (0): DepthwiseSeparableConv(
       (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
       (bn1): BatchNormAct2d(
         32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
         (drop): Identity()
         (act): SiLU(inplace=True)
       )
       (aa): Identity()
       (se): SqueezeExcite(
         (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
         (act1): SiLU(inplace=True)
         (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
         (gate): Sigmoid()
       )
       (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
       (bn2): BatchNormAct2d(
         16, eps=1e-05, momentum=0.1, affine=Tr

In [10]:
dataloader = DataLoader(dataset,batch_size=32,shuffle = True)
for images,labels in dataloader:
    print(images.size())
    break
    
m = BirdsClassifier()
predictions = m(images)
c = nn.CrossEntropyLoss()
c(predictions,labels)

torch.Size([32, 3, 128, 128])


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

tensor(6.2545, grad_fn=<NllLossBackward0>)

In [11]:


train_folder = '/kaggle/input/100-bird-species/train'
valid_folder = '/kaggle/input/100-bird-species/valid'

train_dataset = BirdSpecies(data_dir = train_folder, transform = transform)
valid_dataset = BirdSpecies(data_dir = valid_folder, transform = transform)

train_loader = DataLoader(train_dataset,batch_size = 32, shuffle = True)
valid_loader = DataLoader(valid_dataset,batch_size = 32, shuffle = False)

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
epochs = 5
train_losses,valid_losses = [],[]

model = BirdsClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = .001)

model = model.to(device)

for epoch in range(epochs):
    model.train() #model is in training mode
    running_loss = 0.0
    for images,labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()#sets gradients computed to 0
        output = model(images)
        loss = criterion(output,labels)
        loss.backward()
        #^sets gradients
        optimizer.step()
        #^updates gradients
        num_of_images_batch = images.size(0)
        running_loss += loss.item()*num_of_images_batch
        #^because loss returns avg loss per prediction of the batch
        
    train_losses.append(running_loss/len(train_loader.dataset))
    
    model.eval()
    running_loss_valid = 0.0
    with torch.no_grad():
        for images,labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            output = model(images)
            loss = criterion(output,labels)
            
            running_loss_valid += loss.item()*images.size(0)#or batch size
    
    valid_losses.append(running_loss_valid/len(valid_loader.dataset))
    
    print(f"Epoch: {epoch}/{epochs} - Train Loss: {train_losses[epoch]} - Valid Loss: {valid_losses[epoch]}")
            
        

Epoch: 0/5 - Train Loss: 1.3018143099110147 - Valid Loss: 0.3490261845815749
Epoch: 1/5 - Train Loss: 0.46624922835764193 - Valid Loss: 0.30215078808012463
Epoch: 2/5 - Train Loss: 0.3339213438501542 - Valid Loss: 0.2499705170733588
Epoch: 3/5 - Train Loss: 0.25674345973759594 - Valid Loss: 0.22297737400304704
Epoch: 4/5 - Train Loss: 0.20778606323185397 - Valid Loss: 0.25101257233392626


In [14]:
test_folder = '/kaggle/input/100-bird-species/test'
test_dataset = BirdSpecies(data_dir = test_folder, transform = transform)
test_loader = DataLoader(test_dataset,batch_size = 32, shuffle = False)

In [52]:
model.eval()
incorrect =0
wrong_imgs = []
with torch.no_grad():
    for images_og,labels_og in test_loader:
        images,labels = images_og.to(device),labels_og.to(device)
        output = model(images)
        predictions=torch.argmax(output,axis=1)
        for i in range(len(predictions)):
            if predictions[i]!=labels[i]:
                wrong_imgs.append(images_og[i].permute(1,2,0).numpy())
                incorrect+=1
                
            
error = incorrect/len(test_loader.dataset)
print(f"error rate: {error*100}% - accuracy: {100-error*100}")
#for image in wrong_imgs:


error rate: 4.647619047619048% - accuracy: 95.35238095238095
