In [49]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision.io import read_image
import glob
import os
device = torch.device("cuda" if torch.cuda.is_available() else "cpu");

### Describing the ResNet class

In [50]:
class ResNet(nn.Module):
    def __init__(self, in_channels, num_classes, n):
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 16, kernel_size=3, stride=1, padding=1, device=device); # 16 filters, kernel size 3x3. output shape is same as input shape due to padding = 1
        self.n = n;
        #Now we have the next 3 blocks of ResNet. 
        # The first n blocks have 2n filters of size 3x3, and 16 filters, with residual connection between each 2 consecutive filters.
        self.res16 = [];
        for i in range(2*n): #2n channels of 16 filters each. Need to have residual connections between them too.
            self.res16.append(nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, device= device)); 

        self.res16_32_1x1 = nn.Conv2d(16, 32, kernel_size=1, stride=2, padding=0, device=device); #1x1 convolution to increase the number of filters to 32, and halve the feature map size from 256x256 to 128x128.
        self.res32 = [nn.Conv2d(16,32, kernel_size=3, stride=2, padding=1, device=device)]; #Halves the feature map size from 256x256 to 128x128, while increasing filters to 32
        for i in range(2*n-1):
            self.res32.append(nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, device=device));

        self.res32_64_1x1 = nn.Conv2d(32, 64, kernel_size=1, stride=2, padding=0, device=device); #1x1 convolution to increase the number of filters to 64, and halve the feature map size from 128x128 to 64x64.
        self.res64 = [nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, device=device)]; #Halves the feature map size from 128x128 to 64x64, while increasing filters to 64
        for i in range(2*n-1):
            self.res64.append(nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, device=device));

        self.final_mean_pool = nn.AvgPool2d(kernel_size=64, stride=1); #Average pooling to get the mean of the 64x64 feature map
        self.fc = nn.Linear(64, num_classes, device=device); #Fully connected layer to output the class scores

    def forward(self, x):
        x = F.relu(self.conv1(x)); #First Convolutional layer.
        #Now we have the next 3 blocks of ResNet.
        #For the first n blocks. 
        residual = torch.clone(x).to(device); 
        for i in range(0,2*self.n-1,2): #with a step of 2, so we pass over each residual connection.
            x = F.relu(self.res16[i](x));
            x = self.res16[i+1](x)
            x += residual; #We add the residual conneciton before passing it to the next layer.
            x = F.relu(x); #The output is complete here.
            residual = torch.clone(x).to(device); 
        return x;
        #Now we have a residual of shape 16x256x256, we need to pass it through to the next layer of 32x128x128.
        residual = self.res16_32_1x1(residual); #to match the dimensions of the next layer.
        for i in range(0,2*self.n-1,2):
            x = F.relu(self.res32[0](x));
            x = self.res32[i+1](x) #the output is of 32x128x128 here.
            x += residual; 
            x = F.relu(x);
            residual = torch.clone(x).to(device); 

        residual = self.res32_64_1x1(residual); #to match the dimensions of the next layer.
        for i in range(0,2*self.n-1,2):
            x = F.relu(self.res64[0](x));
            x = self.res64[i+1](x) #the output is of 64x64x64 here.
            x += residual;
            x = F.relu(x);
            residual = torch.clone(x).to(device); 
        print("out_shape: ", x.shape); #The output shape is 64x64x64
        x = self.final_mean_pool(x); #Average pooling to get the mean of the 64x64 feature map
        print("after avgpool:" ,x.shape);
        return x;




        

In [51]:
class bird_dataset(Dataset):
    def __init__(self, datapath): #Either test, train, or val datafolder.
        self.datapath = datapath;
        folder_list = glob.glob(datapath + "/*");
        self.data = [];
        self.labels = set();
        for folder in folder_list:
            label = os.path.basename(folder); #gets the last name of the folder, which is the label.
            self.labels.add(label);
            file_list = glob.glob(folder + "/*");
            for file in file_list:
                self.data.append((file, label));
        self.labels = list(self.labels);
        self.label_to_index = {label: i for i, label in enumerate(self.labels)};
    
    def __len__(self):
        return len(self.data);
    
    def __getitem__(self, idx):
        img_path, label = self.data[idx];
        img = read_image(img_path)
        img = img/255;
        # print(img);
        # img = transforms.ToTensor()(img); #converts the image to a tensor, but read_image already does this.
        label = self.label_to_index[label]; #using labels as indices for the classes, instead of names.
        return img, label;



### creating the dataloaders

In [52]:
## Parameters for the network.32
num_classes = 25; 
n = 2; #6n + 2 layers.
in_channels = 3; #RGB images.
batch_size = 1; #Probably wont run on my laptop with just 4GB of VRAM.
initial_learning_rate = 1e-4;
num_epochs = 50; 

In [15]:
Train_loader = DataLoader(bird_dataset("Birds_25\\train"), batch_size=batch_size, shuffle=True); #This is how to use the DataLoader to get batches of data.
Test_loader = DataLoader(bird_dataset("Birds_25\\test"), batch_size=batch_size, shuffle=True);
Val_loader = DataLoader(bird_dataset("Birds_25\\val"), batch_size=batch_size, shuffle=True);

In [53]:
model = ResNet(in_channels, num_classes, n).to(device);

In [54]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= initial_learning_rate);

In [59]:
def train_model(model):
    for epoch in range(num_epochs):
        print("\nepoch: ", epoch+1);
        for i, (images, labels) in enumerate(Train_loader):
            images = images.to(device);
            labels = labels.to(device);

            #Forward pass
            outputs = model(images);
            # print(outputs); 
            print(outputs.shape);
            print(labels.shape);
            # print(labels);
            loss = criterion(outputs, labels);
            #Backward pass
            optimizer.zero_grad(); #Zeroes the gradients before backpropagation.
            loss.backward(); #Backpropagation.
            optimizer.step(); #Updates the weights.
            # if (i+1) % 100 == 0:
            #     print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(Train_loader)}], Loss: {loss.item():.4f}')
            print("batch: ", i+1, "loss: ", loss.item(), end = "          \r");

In [60]:
train_model(model);


epoch:  1


OutOfMemoryError: CUDA out of memory. Tried to allocate 128.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 3.34 GiB is allocated by PyTorch, and 65.45 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
#Check accuracy on training and test to see how good our model is.
def check_accuracy(loader, model):
    correct = 0; num_samples = 0;
    model.eval(); #Sets it into evaluation mode, so no dropout or batchnorm

    with torch.no_grad():
        for x,y in loader:
            x = x.to(device);
            y = y.to(device);
            x = x.reshape(x.shape[0], -1);
            scores = model(x);
            _, predictions = scores.max(1);
            correct += (predictions == y).sum();
            num_samples += predictions.size(0);

        print(f"Got {correct} / {num_samples} with accuracy {float(correct)/float(num_samples)*100:.2f}");
    model.train();