### Transfer learning

Install and import packages

In [271]:
# %pip install numpy
# %pip install matplotlib
# %pip install seaborn
# %pip install torch
# %pip install torchvision
# %pip install helper
# %pip install tqdm

In [272]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch as th
from torch.utils.data import DataLoader
import torchvision as tv
from torchvision import transforms
import os
from tqdm import tqdm
from xml.dom import minidom


### Load dataset
Maybe normalize dataset

In [273]:
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms.functional as TF
import torchvision.transforms as tttttt
import random

random.seed(5)
np.random.seed(100)

def initPathsBase():
        files = os.listdir("./data/images/")
        jpgList = []
        
        for file in files:
            if file.endswith(".jpg"):
                jpgList.append(file)
        
        random.shuffle(jpgList)
        
        return jpgList

def initClasses(binary):
    files = initPathsBase()
    classList = []
    for fname in files:
        theClass = getClass(binary, fname)
        if theClass not in classList:
            classList.append(theClass)
    return classList
    
    
def initClassesCatDog(binary):
    files = initPathsBase()
    classList = []
    for fname in files:
        fname = cleanFileName(fname)
        theClass = getClass(binary, fname)
        if theClass not in classList:
            classList.append(theClass)
    return ["cat", "dog"]

def getClassCatDog(fileName):
    if fileName[0].isupper():
        return "cat"
    else:
        return "dog"
    
def cleanFileName(name):
        if name[0:3] == "AUG":
           return name[3:]
        return name
    
    # Classifies data into Cat/Dog or into one of the 37 classes of breeds
def getClass(binary, fileName):
    fileName = cleanFileName(fileName)
    # getCenterBoundingBox(fileName)
    if binary:
        return getClassCatDog(fileName)
    else:
        nameList = fileName.split("_")
        name = ""
        for namePart in nameList:
            if ".jpg" not in namePart:
                name += namePart
        return name

    
def open_image(path): # https://jovian.ai/aakashns/transfer-learning-pytorch
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')
    
def getDataLists():
    files = initPathsBase()
    return files[:int(len(files)*0.7)], files[int(len(files)*0.7):]

class MyDataset(Dataset):
    def __init__(self, binary=False, limit=1, train=True, paths=None):
        super().__init__()
        self.size=224
        self.files = self.initPaths(limit=limit, train=train, li=paths) # self.initPaths(limit, train)
        self.binary = binary
        self.train = train
        self.classes = initClasses(binary) if not binary else initClassesCatDog(binary)
        # TODO: fix transforms better
        self.transform = self.getTransform()
        self.augTransform = self.getAugTransform()

    def __len__(self):
        return len(self.files)

    def __getitem__(self, i, stats=False):
        path = "./data/images/" + self.files[i]
        if stats:
            img = open_image(path)
        else:
            if self.files[i][0:3] == "AUG":
                img = self.augTransform(open_image("./data/images/" + self.files[i][3:]))
            else:
                img = self.transform(open_image(path))
        class_idx = self.classes.index(self.getClass(self.files[i]))

        return img, class_idx
    
    def getClass(self, fileName):
        return getClass(self.binary, fileName)
    
    def getTransform(self):
        return transforms.Compose([transforms.Resize(255),
                                    transforms.CenterCrop(224), 
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ]) # normalization ABC
        
    def getAugTransform(self):
        return transforms.Compose([
            transforms.RandomHorizontalFlip(p = 0.7), 
                                    transforms.RandomRotation(degrees=(-30,30)),
                                    transforms.Resize(255),
                                    transforms.CenterCrop(224), 
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                    ]) # normalization ABC

    
    def getCenterBoundingBox(self, fileName):
        try:
            file = minidom.parse('./annotations/xmls/'+fileName.replace(".jpg", '.xml'))
            xmin = file.getElementsByTagName('xmin')[0].firstChild.data
            xmax = file.getElementsByTagName('xmax')[0].firstChild.data
            ymin = file.getElementsByTagName('ymin')[0].firstChild.data
            ymax = file.getElementsByTagName('ymax')[0].firstChild.data
            return int(xmin), int(xmax), int(ymin), int(ymax)
        except:
            return 0, 0, 0, 0
    
    
    def getStats(self):
        avgImage = th.Tensor(np.zeros((self.size, self.size, 3)))
        
        for i in range(self.__len__()):
            img,_ = self.__getitem__(i, stats=True)
            t = transforms.Compose([transforms.Resize(255),
                                    transforms.CenterCrop(224), 
                                    transforms.ToTensor(),
                                    ]) 
            avgImage += t(img)
            
        
        avgImage /= self.__len__()
        
        mean = []
        std = []
        for dimension in enumerate(avgImage):
            print(dimension)
            mean.append(th.mean(dimension))
            std.append(th.std(dimension))
        
        print(mean, std)
            
        
        # return [mean[1], mean[2], mean[3]], [std[1], std[2], std[3]]
        
    def getCropStats(self):
        x = 0
        y = 0
        n = 0
        x_li = []
        y_li = []
        
        for file in self.files:
            xmin, xmax, ymin, ymax = self.getCenterBoundingBox(file)
            if (xmin != 0) and (xmax != 0) and (ymin != 0) and (ymax != 0):
                n += 1
                
                x += (xmax + xmin)/2
                y += (ymax + ymin)/2
                
                x_li.append(x)
                y_li.append(y)
                
        print("Average x:", x/n, "Average y:", y/n)
        
        x_np = np.asarray(x_li)
        y_np = np.asarray(y_li)
        
        x_std = np.std(x_np)
        y_std = np.std(y_np)
        print("STD x:", x_std, "STD y:", y_std)
            
    
    def addAugmented(self, jpgList):
        newJpgList = jpgList.copy()
        for jpg in jpgList: # Doubles the ammount of data 
            newJpgList.append("AUG"+jpg)

        return newJpgList
    
    def initPaths(self, limit, train, li):
        # files = os.listdir("./data/images/")
        # jpgList = []
        
        # for file in files:
        #     if file.endswith(".jpg"):
        #         jpgList.append(file)
        
        # random.shuffle(jpgList)
        jpgList = li
        if train:
            # jpgList = jpgList[:int(len(jpgList)*0.7)]
            jpgList = self.addAugmented(li)
            random.shuffle(jpgList)
        else:
            # jpgList = jpgList[int(len(jpgList)*0.7):]
            pass
        
        return jpgList[0:int(len(jpgList)*limit)]
    

# test = MyDataset(False)
# test.getCropStats()

In [274]:
from torch.utils.data import random_split

def initDataset(batch_size, binary): #batch size affects computation time
    trainPaths, testValPaths = getDataLists()
    train = MyDataset(binary, train=True, paths=trainPaths)
    testValDataset = MyDataset(binary, train=False, paths=testValPaths)
    testValDataset.classes = train.classes # WTF
    # Train/Validation/Test split. Current: 70/15/15

    # train, test = random_split(dataset, [int(0.85*len(dataset))+1, int(0.15*len(dataset))])
    valid, test = random_split(testValDataset, [int(0.5*len(testValDataset))+1, int(0.5*len(testValDataset))]) 
    
    # print(len(train), len(valid), len(test))
    # print(len(train)+ len(valid)+ len(test), len(dataset))
    
    # Enable augementation for the training dataset
    # augmentedDataset = AugmentedDataset(train)
    
    train_loader = th.utils.data.DataLoader(train,
                                            batch_size=batch_size, 
                                            shuffle=True)
    
    test_loader = th.utils.data.DataLoader(test,
                                            batch_size=batch_size,
                                            shuffle=True)
    valid_loader = th.utils.data.DataLoader(valid,
                                            batch_size=batch_size,
                                            shuffle=True)
    
    return train_loader , test_loader, valid_loader, train

train_loader, test_loader, valid_loader, train = initDataset(batch_size=32, binary=False)

### Inspect dataset

In [275]:
# n = 0
# n2 = 0
# for batch_idx, batch in enumerate(train_loader):
#     n += 32
#     if n%320 == 0:
#         print(n)
    
#     if n == 64:
#         images = batch[0]
#         labels = batch[1]
#         for i in range(len(images)):
#             n2+=1
#             plt.imshow(images[i].permute(1, 2, 0))
#             plt.show()
#             if n2 == 1:
#                 break
#         break


### Initialize model

In [276]:
model = tv.models.resnet18(progress = True, pretrained=True)
# model.eval()

In [277]:

#Set requires_grad to false for every layer
moreLayers = True
for param in model.parameters():
    param.requires_grad = False

modules = model.named_modules()

for i in modules:
    if isinstance(i[1], th.nn.BatchNorm2d):
        if "layer4" in i[0] and "bn" in i[0]:
            i[1].momentum = 0.2
    
# Replace the last layer of the pretrained model with our own:
# This should theoretically only set the last layer to requires_grad = True, since it is the default setting
model.fc = th.nn.Linear(model.fc.in_features, 37) # 37 if not binary

# Set the second layer to requires_grad = True
for param in model.layer4.parameters():
    param.requires_grad = True


# Replace more layers:
# TODO: implement

# Examine different learning rates / rate schedulers
# TODO: implement

# Apply data augmentation during training (flip, small rotations, crops, small size scaling)
# TODO: implement

# Effect of fine-tuning or not the batch-norm parameters and updating the estimate of the batch 
# mean and standard deviations on the final performance on the new dataset.
# model.eval()

### Train the model

In [278]:
# Train the model on our dataset
def train_model(model, train_loader, valid_loader, epochs=5, lr=10**-3, weight_decay=0.0, sheduler_gamma=0.9):
    # Define the loss function
    criterion = th.nn.CrossEntropyLoss()
    # Define the optimizer
    optimizer = th.optim.Adam(model.fc.parameters(), lr=lr, weight_decay=weight_decay) # filter(lambda p: p.requires_grad, model.parameters()),
    
    # Set different learning rates for different layers
    optimizer.add_param_group({'params': model.layer4.parameters(), 'lr': lr/10})
    
    # Train the model
    scheduler = th.optim.lr_scheduler.ExponentialLR(optimizer, gamma=sheduler_gamma)
    
    for epoch in range(epochs):
        # Training
        model.train() #trains model
        for batch_idx, batch in enumerate(tqdm(train_loader)):
            images = batch[0]
            labels = batch[1]
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad() # Reset the gradients, maybe we should not do this
            loss.backward() # Compute the gradients
            optimizer.step() # Update the weights
        # Validation
        scheduler.step()
        model.eval()
        with th.no_grad(): # Disables tracking of calculations required to calculate gradients
            correct = 0
            total = 0
            for batch_idx, batch in enumerate(valid_loader):
                images = batch[0]
                labels = batch[1]
                #print(labels)
                outputs = model(images)
                #print(outputs)
                _, predicted = th.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print('Learning rate: {} \n'.format(lr))
            print('Accuracy of the network on the validation images: {} %'.format(100 * correct / total))
            # correct = 0
            # total = 0
            # for batch_idx, batch in enumerate(train_loader):
            #     images = batch[0]
            #     labels = batch[1]
            #     #print(labels)
            #     outputs = model(images)
            #     #print(outputs)
            #     _, predicted = th.max(outputs.data, 1)
            #     total += labels.size(0)
            #     correct += (predicted == labels).sum().item()
            # print('Accuracy of the network on the train images: {} %'.format(100 * correct / total))
            
    return model

# new = train_model(model, train_loader, valid_loader, epochs=20, lr=10**-3 , weight_decay=0.0)
new = train_model(model, train_loader, valid_loader, epochs=5, lr=10**-3, weight_decay=0.0, sheduler_gamma=0.5)
#new = train_model(model, train_loader, valid_loader, epochs=5, lr=10**-3, weight_decay=0.0)
#new = train_model(model, train_loader, valid_loader, epochs=5, lr=10**-4, weight_decay=0.0)

100%|██████████| 324/324 [28:33<00:00,  5.29s/it]


Learning rate: 0.001 

Accuracy of the network on the validation images: 89.72046889089269 %


100%|██████████| 324/324 [26:44<00:00,  4.95s/it]


Learning rate: 0.001 

Accuracy of the network on the validation images: 90.892696122633 %


 10%|█         | 34/324 [03:54<31:34,  6.53s/it]

In [None]:
total = 0
correct = 0
n = 0
for batch_idx, batch in enumerate(test_loader):
    images = batch[0]
    labels = batch[1]
    #print(labels)
    outputs = model(images)
    #print(outputs)
    _, predicted = th.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    n+=1
    for i in range(len(images)):
            
            plt.imshow(images[i].permute(1, 2, 0))
            plt.title("Predicted: " + str(train.classes[predicted[i].item()]) + "\nActual: " + str(train.classes[labels[i].item()]), fontsize=15, color='green' if predicted[i] == labels[i] else 'red', fontweight='bold')
            plt.show()
    if n == 3:
        break

In [None]:
new2 = train_model(model, train_loader, valid_loader, epochs=10, lr=10**-3, weight_decay=0.0,sheduler_gamma=0.1)


In [None]:
new2 = train_model(model, train_loader, valid_loader, epochs=10, lr=10**-3, weight_decay=0.0, sheduler_gamma=0.5)


In [None]:
new3 = train_model(model, train_loader, valid_loader, epochs=20, lr=10**-4, weight_decay=0.0, sheduler_gamma=0.1)


# Milestones:
## Grade E:
- [x] Achieve >99% on binary classification
- [ ] Achieve >95% on multi-class classification
- [x] Examine fine tuning more layers
- [x] Examine different learning rates
- [x] Examine data augmentation
- [-] Fine tune batch-norm

## Grade A:
### Decrease the percentage of labelled data: 
- [ ] 50 %
- [ ] 10 %
- [ ] 1 %
- [ ] Implement Pseudo-labelling


### Pseudo labelling

In [None]:
# Pseudo labelling

# Reduce the amount of labels

# Train a model on the reduced dataset

# Use the model to generate pseudo labels

# Train a model on the pseudo labels and the original dataset

# Implement ensambling? Implement Pretrain?