In [3]:
from __future__ import print_function, division
import os
import torch
import torch.nn as nn
import pandas as pd
import torch.optim as optim
from torch.optim import lr_scheduler
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets,transforms, models, transforms
import numpy as np
#from utils import plot_images
from torch.utils.data.sampler import SubsetRandomSampler



import scipy.io
from PIL import Image
import copy
import time

im = Image.open('data/cars_test/00011.jpg')
trafo = transforms.Compose([transforms.Resize(244),
                            transforms.CenterCrop(244),
                            transforms.ToTensor(),
                            transforms.Normalize(
                                    mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225]) ])
im = trafo(im)#[None, :, :, :]

In [4]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, p):
        return p

class Bilinear(nn.Module):
    def __init__(self, fc_in, conv1, conv2):
        super(Bilinear, self).__init__()
        self.f_a = conv1   
        self.f_b = conv2    
        self.fc_in = fc_in
        self.fc = nn.Linear(self.fc_in, 196)
        #self.softmax = nn.Softmax(1)
        
    def forward(self, x):
        out_fa = self.f_a(x)
        out_fb = self.f_b(x)
        bs = out_fa.shape[0]
        #print('p1:', out_fb.shape)
        #print('p2:', out_fa.shape)
        pooled_bil = self.bilinearSumpool(A=out_fa, B=out_fb)
        #print('min:', pooled_bil.min())
        #print('max:', pooled_bil.max())
        pooled_bil = torch.sign(pooled_bil)*torch.sqrt(torch.abs(pooled_bil))
        pooled_bil = pooled_bil / torch.norm(pooled_bil)
        
        #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        #pooled_bil = pooled_bil.to(device)
        
        pooled_bil = self.fc(pooled_bil.cuda())
        #x = self.softmax(self.fc(x))
        return pooled_bil
    
    def bilinearSumpool(self, A, B):
        bs, fs, row, col = A.shape    # bs: batchsize, fs: featuresize
        bil = torch.empty((bs, row, col, fs, fs))
        #print(bs, row, col, fs, fs)
        for z in range(row):
            for s in range(col):
                bil[:, z, s] = torch.bmm(A[:, :, z, s][:, :, None], B[:, :, z, s][:, None, :])
        bil = bil.sum((1,2)).view(bs, -1)
        #print(bil.shape)
        return bil

To cut the ResNet50 and get the feature output of shape $(512\times 28\times 28)$ use  
`nn.Sequential(*list(list(ResNet50Module.children())[0].children())[:-2])`  
To cut the VGG19 and get the feature output of shape $(512\times 28\times 28)$ use  
`nn.Sequential(*list(list(list(vgg19Module.children())[0])[0])[:-14])`

Alternatively for the shape $(256\times 56\times 56)$  

for PyTorch  
`nn.Sequential(*list(resnet50.children())[:-5])`

`nn.Sequential(*list(list(vgg19.children())[0])[:-27])`


for FastAi  
`nn.Sequential(*list(list(ResNet50Module.children())[0].children())[:-3])`  

`nn.Sequential(*list(list(list(vgg19.children())[0])[0])[:-27])`

In [5]:
# use the 256 x 56 x 56 shape
resnet50 = models.resnet50(pretrained=True)
vgg19 = models.vgg19_bn(pretrained=True)

resnet50_cut = nn.Sequential(*list(resnet50.children())[:-5])
vgg19_cut = nn.Sequential(*list(list(vgg19.children())[0])[:-27])

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch = torch.stack((im, im, im, im, im, im, im, im))
model = Bilinear(256*256, vgg19_cut, resnet50_cut)
model = model.to(device)

#model(batch.to(device)).shape

In [8]:
bil = torch.load('Bilinear Model beginning')

## Getting the Dataset/Dataloader

In [9]:
def get_train_valid_loader(data_dir,
                           batch_size,
                           #augment,
                           random_seed,
                           valid_size=0.1,
                           shuffle=True,
                           show_sample=False,
                           num_workers=1,
                           pin_memory=True):
    """
    Utility function for loading and returning train and valid
    multi-process iterators over the Stanford Cars dataset. A sample
    9x9 grid of the images can be optionally displayed.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - augment: whether to apply the data augmentation scheme
      mentioned in the paper. Only applied on the train split.
    - random_seed: fix seed for reproducibility.
    - valid_size: percentage split of the training set used for
      the validation set. Should be a float in the range [0, 1].
    - shuffle: whether to shuffle the train/validation indices.
    - show_sample: plot 9x9 sample grid of the dataset.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - train_loader: training set iterator.
    - valid_loader: validation set iterator.
    """
    error_msg = "[!] valid_size should be in the range [0, 1]."
    assert ((valid_size >= 0) and (valid_size <= 1)), error_msg

#     normalize = transforms.Normalize(
#         mean=[0.4914, 0.4822, 0.4465],
#         std=[0.2023, 0.1994, 0.2010],
#     )

    # define transforms
    
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(244),
            transforms.RandomHorizontalFlip(),
            #transforms.RandomRotation(90),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'valid': transforms.Compose([
            transforms.Resize(244),
            transforms.CenterCrop(244),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    
    train_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                          data_transforms[x])
                  for x in ['train', 'valid']}
    

    num_train = len(train_datasets['train'])
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_datasets['train'], batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )
    valid_loader = torch.utils.data.DataLoader(
        train_datasets['valid'], batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    # visualize some images
    if show_sample:
        sample_loader = torch.utils.data.DataLoader(
            train_datasets['train'], batch_size=9, shuffle=shuffle,
            num_workers=num_workers, pin_memory=pin_memory,
        )
        data_iter = iter(sample_loader)
        images, labels = data_iter.next()
        X = images.numpy().transpose([0, 2, 3, 1])
        plot_images(X, labels)

    return ({'train': train_loader, 'valid': valid_loader})


def get_test_loader(data_dir,
                    batch_size,
                    shuffle=True,
                    num_workers=1,
                    pin_memory=True):
    """
    Utility function for loading and returning a multi-process
    test iterator over the CIFAR-10 dataset.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - shuffle: whether to shuffle the dataset after every epoch.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - data_loader: test set iterator.
    """
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )

    # define transform
    transform = transforms.Compose([
        transforms.Resize(244),
        transforms.CenterCrop(244),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=transform)
    

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    return data_loader

In [12]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(244),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(244),
        transforms.CenterCrop(244),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'data/kaggle/stanford-car-dataset-by-classes-folder'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=8,
                                             shuffle=True, num_workers=0)
              for x in ['train', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
dataset_sizes['valid'] = dataset_sizes['train']*0.2
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
def cropped_dataloaders(size, batchsize_train, batchsize_test=None): 
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(size),
            transforms.RandomHorizontalFlip(),
            #transforms.RandomRotation(90),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'test': transforms.Compose([
            transforms.Resize(size),
            transforms.CenterCrop(size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    if batchsize_test == None: batchsize_test = batchsize_train
    batchsizes = {'train': batchsize_train, 'test': batchsize_test}
    data_dir = 'data/kaggle/stanford-car-dataset-by-classes-folder'
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                              data_transforms[x])
                      for x in ['train', 'test']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batchsizes[x],
                                                 shuffle=True, num_workers=0)
                  for x in ['train', 'test']}
    return dataloaders

def visualize_model(model, dataloaders, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['test']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

## Defining the Training function

In [14]:
def train_model(model, criterion, optimizer, scheduler, dataloader, num_epochs=25, start_epoch=0, savePath='', accuracities=[[], []]):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+start_epoch, num_epochs - 1 + start_epoch))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                if (epoch+start_epoch)%4 != 0: break # Evaluate only every 3th epidode
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            i, j = 0, 0
            for inputs, labels in dataloader[phase]:
                i += inputs.shape[0]
                if i >= 2000:
                    j += 1
                    i -= 2000
                    print(j*2000, 'Bilder gesehen')
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            if phase == 'valid':
                accuracities[0].append(epoch+start_epoch)
                accuracities[1].append(epoch_acc)
                torch.save({'model_state_dict': model.state_dict(),
                            'accuracities': accuracities, 
                            'optimizer_state_dict': optimizer.state_dict()},
                            f'models/{savePath}_e{epoch+start_epoch}.pt')

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best valid Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, accuracities


def evaluate_model(model, criterion, dataloader):
    # Each epoch has a training and validation phase

    running_loss = 0.0
    running_corrects = 0

    # Iterate over data.
    i, j = 0, 0
    for inputs, labels in dataloader['valid']:
        i += inputs.shape[0]
        if i >= 2000:
            j += 1
            i -= 2000
            print(j*2000, 'Bilder gesehen')
        inputs = inputs.to(device)
        labels = labels.to(device)

        # forward
        # track history if only in train
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

        # statistics
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / dataset_sizes['valid']
    epoch_acc = running_corrects.double() / dataset_sizes['valid']

    print('Loss: {:.4f} Acc: {:.4f}'.format(
         epoch_loss, epoch_acc))

    print()
    print('Evaluation complete')

In [15]:
#model = model.to(device)
bil = bil.to(device)
criterion = nn.CrossEntropyLoss()

parameters = []
for param in bil.parameters():
        parameters.append(param)
        param.requires_grad = False
        
for param in bil.fc.parameters():
    param.requires_grad = True
#parameters[-1].requires_grad = True
        
# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer = optim.SGD(bil.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
#dl244 = cropped_dataloaders(244, 8)
dl244 = get_train_valid_loader(data_dir='data/kaggle/stanford-car-dataset-by-classes-folder', 
                               batch_size=8, random_seed=42, valid_size=0.2)


In [26]:
bil, accuracities = train_model(model=bil, criterion=criterion, optimizer=optimizer, scheduler=exp_lr_scheduler,
                                 dataloader=dl244, num_epochs=21, savePath='BilinearVggRes')

Epoch 0/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2186 Acc: 0.0072
valid Loss: 5.2856 Acc: 0.0055

Epoch 1/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2184 Acc: 0.0072

Epoch 2/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2181 Acc: 0.0072

Epoch 3/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2179 Acc: 0.0072

Epoch 4/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2177 Acc: 0.0072
valid Loss: 5.2883 Acc: 0.0055

Epoch 5/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2176 Acc: 0.0072

Epoch 6/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2174 Acc: 0.0072

Epoch 7/20
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2173 Acc: 0.0072

Epoch 8/20
----------
2000

In [33]:
import torchvision


def get_new_model(num, lrate=0.001):
    if num == 18:
        model_conv = torchvision.models.resnet18(pretrained=True)
    if num == 34:
        model_conv = torchvision.models.resnet34(pretrained=True)
    if num == 50:
        model_conv = torchvision.models.resnet50(pretrained=True)
    if num == 152:
        model_conv = torchvision.models.resnet152(pretrained=True)
    for param in model_conv.parameters():
        param.requires_grad = False

    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, 196)

    model_conv = model_conv.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that only parameters of final layer are being optimized as
    # opposed to before.
    optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=lrate, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=10, gamma=0.1)
    return model_conv, criterion, optimizer_conv, exp_lr_scheduler


In [34]:
model_conv, criterion, optimizer_conv, exp_lr_scheduler = get_new_model(50)
model_conv, accuracities = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler,dataloader=dl244, num_epochs=10, savePath='ResNet50_dl256_withRotation_bz8')

Epoch 0/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 4.2079 Acc: 0.0131
valid Loss: 4.9699 Acc: 0.0368

Epoch 1/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.9212 Acc: 0.0404

Epoch 2/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.7019 Acc: 0.0670

Epoch 3/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.5167 Acc: 0.0903

Epoch 4/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.3648 Acc: 0.1142
valid Loss: 3.9960 Acc: 0.1553

Epoch 5/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.2340 Acc: 0.1406

Epoch 6/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.1248 Acc: 0.1607

Epoch 7/9
----------
2000 Bilder gesehen
4000 Bilder gesehen
6000 Bilder gesehen
train Loss: 3.0229 Acc: 0.1799

Epoch 8/9
----------
2000 Bilder g