# Transfer Learning

In [90]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import os
import cv2
import time
import glob
import random
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt

from torchvision import models
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard.writer import SummaryWriter

from tqdm import tqdm
from tempfile import TemporaryDirectory
from pandas.core.common import flatten
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import precision_score, f1_score, recall_score, confusion_matrix

cudnn.benchmark = True
plt.ion()
writer = SummaryWriter('runs')

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print("Running on CUDA!\n")
else:
    device = torch.device('cpu')
    print("Running on CPU!\n")

train_data_path = "./Dataset/Train/" 
test_data_path = "./Dataset/Validation/"

class_names = []
test_image_paths = []
train_image_paths = []

for data_path in glob.glob(train_data_path + '/*'):
    class_names.append(data_path.split('/')[-1]) 
    train_image_paths.append(glob.glob(data_path + '/*.jpg'))
    
for data_path in glob.glob(test_data_path + '/*'):
    test_image_paths.append(glob.glob(data_path + '/*.jpg'))

train_image_paths = list(flatten(train_image_paths))
test_image_paths = list(flatten(test_image_paths))
random.shuffle(train_image_paths)

idx_to_class = {i:j for i, j in enumerate(class_names)}
class_to_idx = {value:key for key,value in idx_to_class.items()}

class Dataset(Dataset):
    def __init__(self, image_paths, transform=False):
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = image_filepath.split('/')[-2]
        label = class_to_idx[label]
        if self.transform:
            image = self.transform(image=image)["image"]
        
        return image, label

data_transforms = {
    "Train": A.Compose([
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=360, p=0.5),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.MultiplicativeNoise(multiplier=[0.5,2], per_channel=True, p=0.2),
        A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
        A.HorizontalFlip(),
        A.VerticalFlip(),
        A.Resize(224, 224),
        A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
    "Validation": A.Compose([
        A.Resize(224, 224),
        A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ToTensorV2(),
    ]),
}

image_datasets = {
    "Train" : Dataset(train_image_paths, data_transforms["Train"]),
    "Validation" : Dataset(test_image_paths, data_transforms["Validation"]),
}
dataloaders = {x:DataLoader(image_datasets[x], batch_size=16, shuffle=True) for x in ["Train", "Validation"]}
dataset_sizes = {x: len(image_datasets[x]) for x in ["Train", "Validation"]}
inputs, classes = next(iter(dataloaders["Validation"]))

img_grid = torchvision.utils.make_grid(inputs)
writer.add_image('sea_animals_images', img_grid)

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch [{epoch+1}/{num_epochs}]\n')

            mean_acc = 0.0
            mean_loss = 0.0
            class_labels = []
            class_preds = []
            for phase in ["Train", "Validation"]:
                if phase == "Train":
                    model.train()
                else:
                    model.eval()
                    
                
                running_loss = 0.0
                running_corrects = 0

                for i, (inputs, labels) in enumerate(tqdm(dataloaders[phase])):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    with torch.set_grad_enabled(phase == "Train"):
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        _, preds = torch.max(outputs, 1)

                        if phase == "Train":
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()

                        class_probs_batch = [F.softmax(output, dim=0) for output in outputs]

                        class_preds.append(class_probs_batch)
                        class_labels.append(labels)

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)


                if phase == "Train":
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase] * 100

                mean_loss += epoch_loss
                mean_acc += epoch_acc

                print(f'{phase}: [Loss = {epoch_loss:.4f}] [Acc = {epoch_acc:.2f}%]\n')

                # deep copy the model
                if phase == "Validation" and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)

            class_preds = torch.cat([torch.stack(batch) for batch in class_preds])
            class_labels = torch.cat(class_labels)

            np_labels = class_labels.cpu().detach().numpy()
            np_preds = class_preds.cpu().detach().numpy()
            np_preds = np.argmax(np_preds, axis=1)

            cm = confusion_matrix(np_labels, np_preds)
            precision = precision_score(np_labels, np_preds, average="macro")
            recall = recall_score(np_labels, np_preds, average="macro")
            f1 = f1_score(np_labels, np_preds, average="macro")

            writer.add_scalar('avg_accuracy', mean_acc / 2, epoch)
            writer.add_scalar('avg_loss', mean_loss / 2, epoch)
            writer.add_scalar('avg_precision', precision, epoch)
            writer.add_scalar('avg_recall', recall, epoch)
            writer.add_scalar('avg_f1', f1, epoch)

            print(f"Confusion matrix :\n{cm}\n")
            print(f"precision : {precision:.4f}")
            print(f"recall : {recall:.4f}")
            print(f"f1 : {f1:.4f}")
            
            classes = range(10)
            for i in classes:
                labels_i = class_labels == i
                preds_i = class_preds[:, i]
                writer.add_pr_curve(str(i), labels_i, preds_i, global_step=1)
                writer.close()

            print('_' * 50,"\n")
            
        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best validation Acc: {best_acc:.2f}%')


        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
        
    return model

def imshow(inp, title=None):
    """Display image for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

def visualize_model(model, num_images=10):
    was_training = model.training
    model.eval()
    images_so_far = 0

    with torch.no_grad():
        for inputs, labels in dataloaders["Validation"]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for i in range(inputs.size()[0]):
                images_so_far += 1
                imshow(inputs.cpu().data[i], f'predicted: {class_names[preds[i]]}')

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
                
        model.train(mode=was_training)

def visualize_model_predictions(model,img_path):
    was_training = model.training
    model.eval()

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = data_transforms["Validation"](image=img)["image"]
    img = img.unsqueeze(0)
    img = img.to(device)

    with torch.no_grad():
        outputs = model(img)
        _, preds = torch.max(outputs, 1)
        imshow(img.cpu().data[0], f'Predicted: {class_names[preds[0]]}')

        model.train(mode=was_training)

model_conv = models.resnet50(weights='IMAGENET1K_V2')

for param in model_conv.parameters():
    param.requires_grad = False

num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, len(class_names))
model_conv = model_conv.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_conv = optim.Adam(model_conv.fc.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=10, gamma=0.1)
writer.add_graph(model_conv, inputs.to(device))
model_conv = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=20)
visualize_model(model_conv,1)
time.sleep(2)
random_imgs = random.sample(os.listdir("./Dataset/Test/"), 1)

for img in random_imgs:
    visualize_model_predictions(model_conv, img_path=f'./Dataset/Test/{img}')
    time.sleep(2)
    plt.close("all")

correct = {x:0 for x in class_names}
wrong  = {x:0 for x in class_names}
total = {x:0 for x in class_names}

for img_path in glob.glob("./Dataset/Test/*"):
    was_training = model_conv.training
    model_conv.eval()

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    label = img_path.split('/')[-1].split("_")[0]
    label = class_to_idx[label]
    img = data_transforms["Validation"](image=img)["image"]
    img = img.unsqueeze(0)
    img = img.to(device)

    with torch.no_grad():
        outputs = model_conv(img)
        _, preds = torch.max(outputs, 1)
        model_conv.train(mode=was_training)

    if preds[0] == label:
        correct[class_names[label]] += 1
    elif preds[0] != label:
        wrong[class_names[label]] +=1
    total[class_names[label]] += 1

print(f"Test acc : {sum(correct.values())/sum(total.values())*100:.2f}%\n")
print(f"Correct : {correct}")
print(f"Wrong : {wrong}")
print(f"Total : {total}")

Running on CUDA!

Epoch [1/20]


100%|██████████| 249/249 [00:15<00:00, 16.12it/s]


Train: [Loss = 0.9547] [Acc = 78.26%]


100%|██████████| 50/50 [00:02<00:00, 20.74it/s]


Validation: [Loss = 0.4629] [Acc = 90.21%]

Confusion matrix :
[[297  24   1   8  26  15   8   2  56  43]
 [ 13 437   2   1   4   7   0   2   4   3]
 [  0   4 440   5   0   3   2   0   5  20]
 [  1   4   0 455   8   4   0   1   2   4]
 [ 31   7   0   9 350  13   1   0  57  10]
 [  9  21   3   7  14 321  12   5   3  81]
 [  3   5   0   3   6   8 417   2   2  32]
 [  5   6   1   2   0  10   3 396   7  46]
 [ 31  14   0   4  30   3   1   1 391   3]
 [ 29  14   8   9   9  50  17   9   5 329]]

precision : 0.8081
recall : 0.8027
f1 : 0.8033
__________________________________________________ 

Epoch [2/20]


100%|██████████| 249/249 [00:15<00:00, 16.19it/s]


Train: [Loss = 0.4358] [Acc = 89.27%]


100%|██████████| 50/50 [00:02<00:00, 20.54it/s]


Validation: [Loss = 0.3411] [Acc = 91.97%]

Confusion matrix :
[[379   4   2   2  21   9   3   2  25  33]
 [  3 452   2   1   1   4   2   0   4   4]
 [  1   0 475   0   0   0   0   0   0   3]
 [  2   1   1 467   4   0   1   0   3   0]
 [ 25   1   0  11 400   6   0   0  31   4]
 [ 13   8   0   3   6 385   9   2   2  48]
 [  3   0   0   2   0   7 463   0   0   3]
 [  1   0   3   0   0   7   4 444   0  17]
 [ 13   2   0   5  23   0   1   0 434   0]
 [ 16   5   4   2   5  38  13   6   4 386]]

precision : 0.8973
recall : 0.8973
f1 : 0.8969
__________________________________________________ 

Epoch [3/20]


100%|██████████| 249/249 [00:15<00:00, 15.63it/s]


Train: [Loss = 0.3338] [Acc = 90.83%]


100%|██████████| 50/50 [00:02<00:00, 20.51it/s]


Validation: [Loss = 0.2660] [Acc = 92.60%]

Confusion matrix :
[[404   0   1   4  13   7   2   1  21  27]
 [ 10 446   2   0   0  10   2   0   0   3]
 [  0   0 477   1   0   0   0   1   0   0]
 [  0   0   0 473   1   1   2   0   1   1]
 [ 27   0   0   7 408   8   1   0  24   3]
 [ 11   5   0   0   6 408   4   3   0  39]
 [  0   0   0   1   2   8 462   0   0   5]
 [  1   0   0   0   0   7   2 446   0  20]
 [ 19   0   0   4  15   1   0   0 439   0]
 [ 17   4   3   2   3  39  14   7   1 389]]

precision : 0.9121
recall : 0.9113
f1 : 0.9115
__________________________________________________ 

Epoch [4/20]


100%|██████████| 249/249 [00:16<00:00, 15.46it/s]


Train: [Loss = 0.2962] [Acc = 91.61%]


100%|██████████| 50/50 [00:02<00:00, 20.16it/s]


Validation: [Loss = 0.2519] [Acc = 93.35%]

Confusion matrix :
[[404   3   0   1  19  11   2   0  19  21]
 [  7 459   1   0   0   2   1   0   1   2]
 [  1   0 470   2   0   3   0   1   0   2]
 [  3   0   0 469   5   1   0   0   0   1]
 [ 18   2   0   5 415   7   1   0  23   7]
 [  3   4   1   3   4 413   8   3   2  35]
 [  0   0   1   1   0   2 466   3   1   4]
 [  0   0   2   0   0   5   2 451   0  16]
 [ 14   1   0   4  16   0   1   0 442   0]
 [ 15   5   4   0   3  30  10   8   4 400]]

precision : 0.9190
recall : 0.9190
f1 : 0.9189
__________________________________________________ 

Epoch [5/20]


100%|██████████| 249/249 [00:16<00:00, 14.88it/s]


Train: [Loss = 0.2632] [Acc = 92.46%]


100%|██████████| 50/50 [00:02<00:00, 17.63it/s]


Validation: [Loss = 0.2331] [Acc = 93.10%]

Confusion matrix :
[[408   2   1   4  22   3   3   1  16  20]
 [  2 458   1   0   1   5   3   0   2   1]
 [  0   0 474   2   0   1   0   0   0   2]
 [  2   0   0 474   1   0   0   0   0   2]
 [ 22   2   0   5 415   2   2   0  25   5]
 [  2   7   0   1   5 418   8   2   0  33]
 [  3   0   0   1   1   6 461   1   0   5]
 [  2   0   1   0   0   7   3 453   0  10]
 [ 10   0   0   4  12   1   0   0 450   1]
 [ 11   2   4   0   1  30  13   5   3 410]]

precision : 0.9257
recall : 0.9257
f1 : 0.9256
__________________________________________________ 

Epoch [6/20]


100%|██████████| 249/249 [00:16<00:00, 14.70it/s]


Train: [Loss = 0.2361] [Acc = 93.04%]


100%|██████████| 50/50 [00:02<00:00, 19.33it/s]


Validation: [Loss = 0.2251] [Acc = 93.73%]

Confusion matrix :
[[417   2   1   2  17   1   2   0  16  22]
 [  8 457   0   0   0   4   0   0   2   2]
 [  1   0 475   0   0   0   1   0   0   2]
 [  0   0   0 474   3   2   0   0   0   0]
 [ 18   3   0   6 420   6   1   0  16   8]
 [  7   6   0   1   3 417   6   2   1  33]
 [  0   0   0   1   2   3 467   0   0   5]
 [  3   0   2   0   0   4   2 454   0  11]
 [  9   1   0   1  10   1   0   0 456   0]
 [ 16   1   3   0   3  26  10   6   2 412]]

precision : 0.9318
recall : 0.9316
f1 : 0.9316
__________________________________________________ 

Epoch [7/20]


100%|██████████| 249/249 [00:16<00:00, 14.94it/s]


Train: [Loss = 0.2396] [Acc = 92.76%]


100%|██████████| 50/50 [00:02<00:00, 19.40it/s]


Validation: [Loss = 0.1995] [Acc = 94.23%]

Confusion matrix :
[[416   0   0   2  12   6   2   2  19  21]
 [  3 459   0   1   2   4   0   0   0   4]
 [  0   0 474   0   0   1   1   0   0   3]
 [  0   0   1 472   1   1   2   0   0   2]
 [ 20   3   0   2 427   3   2   0  18   3]
 [  3   3   0   1   2 419   4   2   1  41]
 [  2   0   0   0   1   3 466   1   0   5]
 [  3   0   1   0   0   3   1 451   1  16]
 [ 12   0   0   1  14   0   1   0 448   2]
 [ 13   4   2   0   5  29   9   5   2 410]]

precision : 0.9309
recall : 0.9301
f1 : 0.9303
__________________________________________________ 

Epoch [8/20]


100%|██████████| 249/249 [00:17<00:00, 14.47it/s]


Train: [Loss = 0.2310] [Acc = 92.39%]


100%|██████████| 50/50 [00:02<00:00, 18.00it/s]


Validation: [Loss = 0.2107] [Acc = 93.60%]

Confusion matrix :
[[410   3   0   4  13   9   2   0  18  21]
 [  4 457   1   0   2   5   0   0   1   3]
 [  0   0 474   1   0   2   0   0   1   1]
 [  1   1   0 467   5   0   1   0   0   4]
 [ 19   1   0   3 428   7   2   0  15   3]
 [  5   6   0   1   3 416   1   5   1  38]
 [  1   0   0   0   3   4 465   0   0   5]
 [  0   0   0   0   0   4   2 454   0  16]
 [ 12   0   0   1  15   1   0   0 448   1]
 [ 20   4   1   1   4  32   8   4   2 403]]

precision : 0.9264
recall : 0.9259
f1 : 0.9261
__________________________________________________ 

Epoch [9/20]


100%|██████████| 249/249 [00:16<00:00, 14.90it/s]


Train: [Loss = 0.2146] [Acc = 93.42%]


100%|██████████| 50/50 [00:02<00:00, 19.55it/s]


Validation: [Loss = 0.1965] [Acc = 94.48%]

Confusion matrix :
[[411   4   1   3  14   6   2   3  17  19]
 [  6 459   0   0   0   5   0   0   1   2]
 [  0   0 476   0   0   1   0   0   0   2]
 [  0   0   0 476   0   1   1   0   1   0]
 [ 17   1   0   1 426   6   2   0  17   8]
 [  7   0   1   1   0 432   7   2   0  26]
 [  1   0   0   0   0   3 467   1   0   6]
 [  3   0   2   1   1   2   2 452   0  13]
 [  9   2   0   2  12   2   1   0 449   1]
 [ 10   2   2   2   3  22  10   5   1 422]]

precision : 0.9363
recall : 0.9360
f1 : 0.9360
__________________________________________________ 

Epoch [10/20]


100%|██████████| 249/249 [00:16<00:00, 14.86it/s]


Train: [Loss = 0.2100] [Acc = 93.26%]


100%|██████████| 50/50 [00:02<00:00, 19.40it/s]


Validation: [Loss = 0.1863] [Acc = 94.60%]

Confusion matrix :
[[426   3   0   3  15   3   1   1  14  14]
 [  3 458   0   0   1   5   0   0   1   5]
 [  1   1 475   0   0   1   0   0   0   1]
 [  0   0   0 473   2   2   0   0   0   2]
 [ 21   2   1   4 420   3   3   0  21   3]
 [  2   2   1   1   0 431   1   2   0  36]
 [  1   0   0   0   3   6 464   0   0   4]
 [  2   0   1   0   0   2   2 455   0  14]
 [  5   0   0   1  15   1   1   0 452   3]
 [ 18   3   1   2   3  22   8  10   1 411]]

precision : 0.9352
recall : 0.9349
f1 : 0.9350
__________________________________________________ 

Epoch [11/20]


100%|██████████| 249/249 [00:16<00:00, 15.12it/s]


Train: [Loss = 0.1828] [Acc = 94.57%]


100%|██████████| 50/50 [00:02<00:00, 19.73it/s]


Validation: [Loss = 0.1956] [Acc = 94.73%]

Confusion matrix :
[[420   3   1   2  12   6   1   0  18  17]
 [  2 462   0   0   2   3   1   0   0   3]
 [  0   0 477   1   0   0   0   1   0   0]
 [  0   0   0 476   1   0   1   0   0   1]
 [ 15   0   0   2 434   4   0   0  19   4]
 [  3   3   1   1   1 445   2   1   1  18]
 [  0   0   0   0   0   3 468   1   0   6]
 [  0   0   1   0   0   5   1 459   0  10]
 [ 15   0   0   1  10   1   0   0 451   0]
 [  2   2   6   1   3  24   8   5   2 426]]

precision : 0.9461
recall : 0.9460
f1 : 0.9459
__________________________________________________ 

Epoch [12/20]


100%|██████████| 249/249 [00:16<00:00, 14.85it/s]


Train: [Loss = 0.1929] [Acc = 93.89%]


100%|██████████| 50/50 [00:02<00:00, 19.74it/s]


Validation: [Loss = 0.1936] [Acc = 94.35%]

Confusion matrix :
[[420   0   1   1  10  10   2   2  14  20]
 [  3 464   0   0   0   3   0   0   2   1]
 [  0   0 478   0   0   0   0   0   0   1]
 [  0   0   0 473   1   2   1   0   2   0]
 [ 16   0   1   3 429   4   2   0  19   4]
 [  5   2   1   2   2 422   6   3   1  32]
 [  1   0   0   0   0   5 470   0   1   1]
 [  0   0   1   1   0   3   1 456   0  14]
 [ 11   0   0   1  10   1   0   0 452   3]
 [ 13   2   3   2   3  18   8   4   2 424]]

precision : 0.9401
recall : 0.9397
f1 : 0.9398
__________________________________________________ 

Epoch [13/20]


100%|██████████| 249/249 [00:16<00:00, 14.97it/s]


Train: [Loss = 0.1813] [Acc = 93.89%]


100%|██████████| 50/50 [00:02<00:00, 19.43it/s]


Validation: [Loss = 0.1888] [Acc = 94.35%]

Confusion matrix :
[[413   2   0   2  20   7   2   0  17  17]
 [  3 462   0   0   1   5   0   0   1   1]
 [  1   0 476   0   0   1   0   0   0   1]
 [  0   0   0 477   1   0   0   0   1   0]
 [ 25   2   0   1 430   3   0   0  14   3]
 [  3   5   2   1   2 433   5   0   1  24]
 [  0   0   0   0   3   3 468   0   0   4]
 [  1   0   0   0   0   5   3 458   0   9]
 [  9   0   0   0  12   2   0   0 454   1]
 [ 15   2   1   0   2  25  12   3   2 417]]

precision : 0.9398
recall : 0.9398
f1 : 0.9397
__________________________________________________ 

Epoch [14/20]


100%|██████████| 249/249 [00:17<00:00, 14.35it/s]


Train: [Loss = 0.1817] [Acc = 94.70%]


100%|██████████| 50/50 [00:02<00:00, 18.75it/s]


Validation: [Loss = 0.1926] [Acc = 94.48%]

Confusion matrix :
[[427   2   0   1  11   8   2   1  12  16]
 [  3 462   0   0   0   4   0   0   0   4]
 [  0   0 477   0   0   1   0   0   0   1]
 [  0   0   0 477   1   0   0   0   1   0]
 [ 19   1   0   1 433   4   1   0  16   3]
 [  3   3   0   1   1 439   4   4   2  19]
 [  1   0   0   0   5   4 465   0   0   3]
 [  0   0   1   0   0   4   3 454   0  14]
 [  9   0   0   2   8   1   0   0 456   2]
 [ 10   2   2   1   3  17   5   7   1 431]]

precision : 0.9469
recall : 0.9466
f1 : 0.9467
__________________________________________________ 

Epoch [15/20]


100%|██████████| 249/249 [00:17<00:00, 14.49it/s]


Train: [Loss = 0.1873] [Acc = 93.97%]


100%|██████████| 50/50 [00:02<00:00, 19.61it/s]


Validation: [Loss = 0.1980] [Acc = 93.98%]

Confusion matrix :
[[420   1   0   2  11   8   2   1  18  17]
 [  3 461   0   0   0   6   0   0   1   2]
 [  0   0 475   0   0   2   0   0   0   2]
 [  0   1   0 471   4   0   0   0   3   0]
 [ 23   1   0   2 438   3   0   0   9   2]
 [  3   3   0   3   6 424   3   1   1  32]
 [  1   0   0   0   3   4 467   1   0   2]
 [  2   0   1   0   0   5   1 458   0   9]
 [  6   1   0   0  10   1   1   0 458   1]
 [ 13   2   3   0   4  22  10   8   1 416]]

precision : 0.9398
recall : 0.9397
f1 : 0.9397
__________________________________________________ 

Epoch [16/20]


100%|██████████| 249/249 [00:16<00:00, 15.16it/s]


Train: [Loss = 0.1803] [Acc = 94.57%]


100%|██████████| 50/50 [00:02<00:00, 19.44it/s]


Validation: [Loss = 0.1955] [Acc = 94.35%]

Confusion matrix :
[[424   2   0   2   8   9   5   0  14  16]
 [  5 459   0   0   0   3   0   0   0   6]
 [  0   0 476   0   0   2   0   0   0   1]
 [  0   0   0 472   3   1   0   0   0   3]
 [ 15   3   0   5 436   2   3   0  11   3]
 [  5   3   0   0   0 442   4   1   0  21]
 [  2   0   0   0   1   2 469   0   0   4]
 [  2   1   0   0   0   6   1 455   0  11]
 [ 10   0   0   1   9   0   0   1 456   1]
 [  8   1   1   0   2  23  12   5   1 426]]

precision : 0.9458
recall : 0.9454
f1 : 0.9455
__________________________________________________ 

Epoch [17/20]


100%|██████████| 249/249 [00:16<00:00, 14.78it/s]


Train: [Loss = 0.1727] [Acc = 94.65%]


100%|██████████| 50/50 [00:02<00:00, 19.31it/s]


Validation: [Loss = 0.1880] [Acc = 94.35%]

Confusion matrix :
[[431   0   1   1   8   5   2   0  15  17]
 [  3 459   0   0   3   4   0   0   1   3]
 [  0   0 477   0   0   1   0   0   0   1]
 [  0   0   0 474   2   1   0   0   1   1]
 [ 15   0   0   2 428   4   1   0  19   9]
 [  4   1   0   0   2 444   3   1   0  21]
 [  0   0   0   0   1   2 473   0   0   2]
 [  0   0   0   0   0   5   2 459   0  10]
 [ 13   3   0   2   9   0   1   0 449   1]
 [  8   3   1   0   6  20   8   8   1 424]]

precision : 0.9463
recall : 0.9460
f1 : 0.9461
__________________________________________________ 

Epoch [18/20]


100%|██████████| 249/249 [00:16<00:00, 14.86it/s]


Train: [Loss = 0.1801] [Acc = 94.40%]


100%|██████████| 50/50 [00:02<00:00, 19.21it/s]


Validation: [Loss = 0.1907] [Acc = 94.60%]

Confusion matrix :
[[420   2   0   2  15   8   1   0  15  17]
 [  5 460   0   0   1   4   0   0   0   3]
 [  0   0 476   0   0   1   0   0   0   2]
 [  1   0   0 475   1   0   0   0   1   1]
 [ 15   1   0   1 438   1   0   0  17   5]
 [  0   3   0   0   4 429   5   7   0  28]
 [  1   2   0   1   0   7 464   0   0   3]
 [  3   0   1   0   0   5   2 456   0   9]
 [  6   0   0   0   9   0   0   0 463   0]
 [ 11   3   2   0   3  15   9   6   1 429]]

precision : 0.9446
recall : 0.9443
f1 : 0.9443
__________________________________________________ 

Epoch [19/20]


100%|██████████| 249/249 [00:16<00:00, 14.80it/s]


Train: [Loss = 0.1787] [Acc = 94.30%]


100%|██████████| 50/50 [00:02<00:00, 19.10it/s]


Validation: [Loss = 0.1937] [Acc = 94.35%]

Confusion matrix :
[[422   1   1   2  15   9   0   2  12  16]
 [  2 461   0   0   2   3   0   0   1   4]
 [  0   0 477   0   0   1   0   0   0   1]
 [  0   0   1 475   1   0   1   0   1   0]
 [ 22   0   0   4 431   4   1   0  14   2]
 [  5   2   0   1   0 435   4   1   3  25]
 [  0   0   0   0   1   6 466   0   0   5]
 [  1   0   1   0   0   2   1 460   0  11]
 [  9   0   0   2  10   1   0   0 455   1]
 [ 11   3   1   0   2  23   8   8   1 422]]

precision : 0.9432
recall : 0.9431
f1 : 0.9431
__________________________________________________ 

Epoch [20/20]


100%|██████████| 249/249 [00:16<00:00, 14.71it/s]


Train: [Loss = 0.1809] [Acc = 94.32%]


100%|██████████| 50/50 [00:02<00:00, 19.21it/s]


Validation: [Loss = 0.2036] [Acc = 93.98%]

Confusion matrix :
[[429   0   0   1   7   8   2   0  16  17]
 [  4 459   1   0   0   3   1   0   1   4]
 [  0   0 479   0   0   0   0   0   0   0]
 [  0   0   0 475   2   1   0   0   0   1]
 [ 17   2   0   3 425   8   2   0  18   3]
 [  4   2   3   1   2 433   2   0   1  28]
 [  0   0   0   0   4   4 462   0   0   8]
 [  0   0   1   0   0   7   2 458   0   8]
 [ 10   1   0   0  11   1   0   1 453   1]
 [ 10   2   0   0   3  21   9   5   0 429]]

precision : 0.9432
recall : 0.9427
f1 : 0.9428
__________________________________________________ 

Training complete in 6m 26s
Best validation Acc: 94.73%
Correct : {'SeaRays': 27, 'JellyFish': 320, 'SeaUrchins': 90, 'Otter': 20, 'Penguin': 2, 'Seahorse': 0, 'Crabs': 18, 'StarFish': 19, 'Dolphin': 221, 'Octopus': 49}
Wrong : {'SeaRays': 10, 'JellyFish': 45, 'SeaUrchins': 9, 'Otter': 0, 'Penguin': 0, 'Seahorse': 0, 'Crabs': 1, 'StarFish': 0, 'Dolphin': 75, 'Octopus': 33}
Total : {'SeaRays': 37, 'Jell

##### =============================================================================================================================

# Custom ConvNet

In [None]:
import random
from pandas.core.common import flatten

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

import gc
import glob


if torch.cuda.is_available():
    device = torch.device('cuda')
    print("Running on CUDA!")
else:
    device = torch.device('cpu')
    print("Running on CPU!")
    

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out
    
    
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes = 10):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3), nn.BatchNorm2d(64), nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self.make_layer(block, 64, layers[0], stride = 1)
        self.layer1 = self.make_layer(block, 128, layers[1], stride = 2)
        self.layer2 = self.make_layer(block, 256, layers[2], stride = 2)
        self.layer3 = self.make_layer(block, 512, layers[3], stride = 2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512, num_classes)
        
    def make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
            
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
      
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
      
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out
    

class_names = []
test_image_paths = []
train_image_paths = []

train_data_path = "./Dataset/Train/" 
test_data_path = "./Dataset/Validation/"


for data_path in glob.glob(train_data_path + '/*'):
    class_names.append(data_path.split('/')[-1]) 
    train_image_paths.append(glob.glob(data_path + '/*'))
    
for data_path in glob.glob(test_data_path + '/*'):
    test_image_paths.append(glob.glob(data_path + '/*'))


train_image_paths = list(flatten(train_image_paths))
random.shuffle(train_image_paths)
train_image_paths = train_image_paths[:int(0.8*len(train_image_paths))]
valid_image_paths = train_image_paths[int(0.8*len(train_image_paths)):]

test_image_paths = list(flatten(test_image_paths))

idx_to_class = {i:j for i, j in enumerate(class_names)}
class_to_idx = {value:key for key,value in idx_to_class.items()}


class Dataset(Dataset):
    def __init__(self, image_paths, transform=False):
        self.image_paths = image_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_filepath = self.image_paths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        label = image_filepath.split('/')[-2]
        label = class_to_idx[label]
        if self.transform:
            image = self.transform(image=image)["image"]
        
        return image, label
    

EPOCHS = 10
BATCH = 32
LR = 0.001
IMG_SIZE = 224
PRIME_TRANSFORMS = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),
    ToTensorV2(),
])
RANDOM_TRANSFORMS =  A.Compose([
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=360, p=0.5),
    A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.MultiplicativeNoise(multiplier=[0.5,2], per_channel=True, p=0.2),
    A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),
    ToTensorV2(),
])

train_dataset = Dataset(train_image_paths, PRIME_TRANSFORMS)
valid_dataset = Dataset(valid_image_paths, PRIME_TRANSFORMS)
test_dataset = Dataset(test_image_paths, PRIME_TRANSFORMS)

print(f"Train size: {len(train_dataset)}\nValid size: {len(valid_dataset)}\nTest size: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH, shuffle=True) 
test_loader = DataLoader(test_dataset, batch_size=BATCH, shuffle=False)

resnet_model = ResNet(ResidualBlock, [3, 4, 6, 3]).to(device)
resnet_criterion = nn.CrossEntropyLoss()
resnet_optimizer = torch.optim.SGD(resnet_model.parameters(), lr=LR, weight_decay=0.001, momentum =0.9)

for epoch in range(EPOCHS):
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = resnet_model(images)
        loss = resnet_criterion(outputs, labels)
        
        resnet_optimizer.zero_grad()
        loss.backward()
        resnet_optimizer.step()
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    print (f'Epoch [{epoch+1}/{EPOCHS}], Loss: {loss.item():.4f}')
            
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = resnet_model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print(f'Accuracy of the network on the {len(valid_dataset)} validation images: {100 * correct / total:.2f} %')

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = resnet_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print(f'Accuracy of the network on the {len(test_dataset)} test images: {100 * correct / total:.2f} %')


CLASSES = 10
EPOCHS = 40
BATCH = 128
LR = 0.01
IMG_SIZE = 227
RANDOM_TRANSFORMS =  A.Compose([
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=360, p=0.5),
    A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.MultiplicativeNoise(multiplier=[0.5,2], per_channel=True, p=0.3),
    A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
    A.Resize(IMG_SIZE, IMG_SIZE, p=1),
    A.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], p=1),
    ToTensorV2(p=1),
])
PRIME_TRANSFORMS = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE, p=1),
    A.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], p=1),
    ToTensorV2(p=1),
])

train_dataset = Dataset(train_image_paths, RANDOM_TRANSFORMS)
valid_dataset = Dataset(valid_image_paths, PRIME_TRANSFORMS)
test_dataset = Dataset(test_image_paths, PRIME_TRANSFORMS)


print(f"Train size: {len(train_dataset)}\nValid size: {len(valid_dataset)}\nTest size: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH, shuffle=False)

alexnet_model = AlexNet(CLASSES).to(device)
alexnet_criterion = nn.CrossEntropyLoss()
alexnet_optimizer = torch.optim.SGD(alexnet_model.parameters(), lr=LR, weight_decay = 0.005, momentum = 0.9)

for epoch in range(EPOCHS):
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = alexnet_model(images)
        loss = alexnet_criterion(outputs, labels)

        alexnet_optimizer.zero_grad()
        loss.backward()
        alexnet_optimizer.step()

    print (f'Epoch [{epoch+1}/{EPOCHS}], Loss: { loss.item():.4f}')

    # with torch.no_grad():
    #     correct = 0
    #     total = 0
    #     for images, labels in valid_loader:
    #         images = images.to(device)
    #         labels = labels.to(device)
    #         outputs = alexnet_model(images)
    #         _, predicted = torch.max(outputs.data, 1)
    #         total += labels.size(0)
    #         correct += (predicted == labels).sum().item()
    #         del images, labels, outputs
    
    #     print(f'Accuracy of the network on the {len(valid_dataset)} validation images: {100 * correct / total:.2f} %')


with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = alexnet_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print(f'Accuracy of the network on the {len(test_dataset)} test images: {100 * correct / total:.2f} %')   

Running on CUDA!
