In [1]:
# !nvidia-smi

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from datasets import load_dataset, DatasetDict
from einops import rearrange, einsum

import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision.models as models
from torchvision.models import resnet18
from torchvision import transforms

In [3]:
# internal_model = models.resnet18
# internal_weights = models.ResNet18_Weights.IMAGENET1K_V1

internal_model = models.resnet34
internal_weights = models.ResNet34_Weights.IMAGENET1K_V1

# internal_model = models.resnet50
# internal_weights = models.ResNet50_Weights.IMAGENET1K_V1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Load images
x_train = np.load('x_train.npy')
x_test = np.load('x_test.npy')

y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

## View some images
# plt.imshow(x_train[2,:,:,: ] )
# plt.axis('off')
# plt.show()

# # convert to torch
# x_train = torch.from_numpy(x_train)
# x_test = torch.from_numpy(x_test)

# y_train = torch.from_numpy(y_train)
# y_test = torch.from_numpy(y_test)

# print('X_train shape:\t' ,x_train.shape)
# print('Y_train shape\t' ,y_train.shape)

# print('X_test shape\t' ,x_test.shape)
# print('Y_test shape\t' ,y_test.shape)

# train_cut = 300
# x_train = x_train[:train_cut]
# y_train = y_train[:train_cut]

# test_cut = 100
# x_test = x_test[:test_cut]
# y_test = y_test[:test_cut]



In [5]:
transform_fn = internal_weights.transforms()
convert_to_tensor = transforms.ToTensor()

# print('Transforms:\n', transform_fn)

def process_image(image):
    image = convert_to_tensor(image)
    # image = rearrange(image, 'h w c -> c h w')
    image = transform_fn(image)
    return image

# transform images
x_train_transformed = list(map(process_image, x_train))
x_test_transformed = list(map(process_image, x_test))

# stack images
x_train_tensor = torch.stack(x_train_transformed) #.to(device)
x_test_tensor = torch.stack(x_test_transformed) #.to(device)

# convert labels to tensor
y_train_tensor = torch.tensor(y_train) #.to(device)
y_test_tensor = torch.tensor(y_test) #.to(device)

y_train_tensor = y_train_tensor - 1
y_test_tensor = y_test_tensor - 1

# add dimension to labels
# y_train_tensor = y_train_tensor.unsqueeze(1)
# y_test_tensor = y_test_tensor.unsqueeze(1)

# TensorDataset
train_data = TensorDataset(x_train_tensor, y_train_tensor)
test_data = TensorDataset(x_test_tensor, y_test_tensor)

# DataLoader
batch_size = 32
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, drop_last=True)


In [6]:
# sample, label = next(iter(train_loader))
# print('Sample shape:', sample.shape)
# print('Label shape:', label.shape)

### Model

In [8]:
class BiLinearModel(nn.Module):
    def __init__(self, num_classes):
        super(BiLinearModel, self).__init__()
        
        self.cnn1 = internal_model(weights=internal_weights)
        self.cnn2 = internal_model(weights=internal_weights)
        

        self.cnn1 = nn.Sequential(*list(self.cnn1.children())[:-2])
        self.cnn2 = nn.Sequential(*list(self.cnn2.children())[:-2])

        self.dropout1 = nn.Dropout(0.5)
        self.dropout2 = nn.Dropout(0.5)

        self.feature_size = internal_model(weights=internal_weights).fc.in_features
        # print('Feature size:', self.feature_size)

        # Define bilinear pooling
        # self.fc = nn.Linear(self.feature_size**2, num_classes) 
        self.fc = nn.Sequential(
            nn.Linear(self.feature_size**2, self.feature_size),
            nn.ReLU(),
            nn.Linear(self.feature_size, num_classes)
        )
    
    def forward(self, x):
        x1 = self.cnn1(x)
        x2 = self.cnn2(x)
        
        # bilinear pooling with einops
        x1 = rearrange(x1, 'b k h w -> b k (h w)')
        x2 = rearrange(x2, 'b k h w -> b k (h w)')

        # dropouts
        x1 = self.dropout1(x1)
        x2 = self.dropout2(x2)

        x = einsum(x1, x2, 'b i j, b k j -> b i k')
        # print('X shape:', x.shape)
        x = rearrange(x, 'b i j -> b (i j)')
        # print('X shape:', x.shape)

        x = self.fc(x)
        return x

model = BiLinearModel(num_classes=20)
model = model.to(device)

in_tensor = torch.randn(1, 3, 224, 224).to(device)
model(in_tensor).shape


  return F.conv2d(input, weight, bias, self.stride,


torch.Size([1, 20])

In [9]:
import torch.optim as optim
from torch.optim import lr_scheduler
from tqdm import tqdm 

# Training function
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        with tqdm(total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}") as pbar:
            for sample in train_loader:
                image, label = sample
                image, label = image.to(device), label.to(device)
                optimizer.zero_grad()
                outputs = model(image)
                loss = criterion(outputs, label)
                loss.backward()
                optimizer.step()
                # update progress bar
                running_loss += loss.item()*image.size(0)

                # accuracy
                _, preds = torch.max(outputs, 1)
                corrects = torch.sum(preds == label.data)
                accuracy = corrects.double() / image.size(0)
                
                pbar.set_postfix(loss=running_loss/len(train_loader.dataset), accuracy=accuracy.item())
                pbar.update(1)
        
        scheduler.step()

    return model

# Freeze the weights of the pre-trained models
for param in model.cnn1.parameters():
    param.requires_grad = False
for param in model.cnn2.parameters():
    param.requires_grad = False

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=1e-5, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Train the model
model = train_model(model, criterion, optimizer, scheduler, num_epochs=20)

# Unfreeze the weights and train again
for param in model.cnn1.parameters():
    param.requires_grad = True
for param in model.cnn2.parameters():
    param.requires_grad = True

optimizer = optim.SGD(model.parameters(), lr=1e-6, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Train the model again
model = train_model(model, criterion, optimizer, scheduler, num_epochs=30)

# evaluate the model
model.eval()
corrects = 0
total = 0
with tqdm(total=len(test_loader), desc=f"Evaluating") as pbar:
    with torch.no_grad():
        for sample in test_loader:
            image, label = sample
            image, label = image.to(device), label.to(device)
            outputs = model(image)
            _, preds = torch.max(outputs, 1)
            corrects += torch.sum(preds == label.data)
            total += image.size(0)
            pbar.update(1)
print(f"Accuracy: {corrects.double()/total}")


Epoch 1/10:   0%|          | 0/24 [00:00<?, ?it/s]

Epoch 1/10: 100%|██████████| 24/24 [00:05<00:00,  4.49it/s, accuracy=0.312, loss=3e+4]   
Epoch 2/10: 100%|██████████| 24/24 [00:05<00:00,  4.62it/s, accuracy=0.406, loss=1.13e+4]
Epoch 3/10: 100%|██████████| 24/24 [00:05<00:00,  4.56it/s, accuracy=0.812, loss=4.51e+3]
Epoch 4/10: 100%|██████████| 24/24 [00:05<00:00,  4.58it/s, accuracy=0.875, loss=1.8e+3] 
Epoch 5/10: 100%|██████████| 24/24 [00:05<00:00,  4.57it/s, accuracy=0.75, loss=2.38e+3] 
Epoch 6/10: 100%|██████████| 24/24 [00:05<00:00,  4.49it/s, accuracy=0.938, loss=779]
Epoch 7/10: 100%|██████████| 24/24 [00:05<00:00,  4.55it/s, accuracy=0.969, loss=363]
Epoch 8/10: 100%|██████████| 24/24 [00:05<00:00,  4.50it/s, accuracy=1, loss=166]     
Epoch 9/10: 100%|██████████| 24/24 [00:05<00:00,  4.52it/s, accuracy=0.906, loss=109] 
Epoch 10/10: 100%|██████████| 24/24 [00:05<00:00,  4.53it/s, accuracy=0.938, loss=158]
Epoch 1/15: 100%|██████████| 24/24 [00:14<00:00,  1.65it/s, accuracy=0.812, loss=461]
Epoch 2/15: 100%|██████████| 24

Accuracy: 0.5924479166666666





In [None]:
# bilinear pooling with einops
# x1 = rearrange(x1, 'b k h w -> b k (h w)')
# x2 = rearrange(x2, 'b k h w -> b k (h w)')
# x = einsum('b i j, b k j -> b i k', x1, x2)
