In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import numpy as np
import torchvision
import matplotlib.pyplot as plt
import time
import os
import copy
import math
from pathlib import Path
from tqdm.notebook import trange, tqdm
from itertools import islice
from collections import Counter
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import itertools
import pixiedust
import random
from torch.utils import data
from torch.utils.tensorboard import SummaryWriter
import datetime

print(torch.__version__)
plt.ion()   # interactive mode
torch.cuda.is_available()

Pixiedust database opened successfully


1.5.0


True

In [3]:
try:
    from google.colab import drive
    drive.mount('/content/gdrive')
    LABS_DIR = Path ('/content/gdrive/My Drive/Labs')
except:
    LABS_DIR = Path ('C:/Labs/')

#DATA_DIR = LABS_DIR/'Data'    
DATA_DIR = LABS_DIR/'DataNoDuplicates'    


In [4]:
def count_classes(loader):
    cnt = Counter([])
    for _,(_,labels) in enumerate(loader):
        cnt.update(labels.numpy())
    return dict(cnt)

def imbalanced_classes_weights(class_counts):
    classes = class_counts.keys()
    biggest_class = max(class_counts.values())
    class_weights = [biggest_class] * (1+max(classes))
    for class_no, class_weight in class_counts.items():
        class_weights[class_no] = biggest_class/(class_weight+1)
    return class_weights

In [5]:
# Data augmentation and normalization for training
# for validatin we use normalization and resize (for train we also change the angle and size of the images)
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(10), # Original: 5
        transforms.RandomHorizontalFlip(),
        #transforms.RandomResizedCrop(224, scale=(0.96, 1.0), ratio=(0.95, 1.05)),
        transforms.RandomResizedCrop(224, scale=(0.9, 1.1), ratio=(0.9, 1.1)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize([224,224]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize([224,224]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Data Loader

In [6]:
BATCH_SIZE = 32
''' The function takes the data loader and a parameter  '''
def create_train_val_slice(image_datasets,sample_size=None,val_same_as_train=False):
    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    
    if not sample_size: # return the whole data set
        dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE,
                                                      shuffle=True, num_workers=1)
                      for x in ['train', 'val']}
        return dataloaders, dataset_sizes
    
    sample_n = {x: random.sample(list(range(dataset_sizes[x])), sample_size)
                for x in ['train', 'val']}

    image_datasets_reduced = {x: torch.utils.data.Subset(image_datasets[x], sample_n[x])
                              for x in ['train', 'val']}
    
    #clone the image_datasets_reduced[train] generator for the val
    if val_same_as_train:
        image_datasets_reduced['val'] = list(image_datasets_reduced['train'])
        image_datasets_reduced['train'] = image_datasets_reduced['val']
        
    dataset_sizes = {x: len(image_datasets_reduced[x]) for x in ['train', 'val']}

    dataloaders_reduced = {x: torch.utils.data.DataLoader(image_datasets_reduced[x], batch_size=BATCH_SIZE,
                                                  shuffle=True, num_workers=1) for x in ['train', 'val']}
    return dataloaders_reduced, dataset_sizes
        

image_datasets = {x: datasets.ImageFolder(os.path.join(DATA_DIR, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}

class_names = image_datasets['train'].classes

sample_size = 500
#data, dataset_sizes =  create_train_val_slice(image_datasets,sample_size,True)
data, dataset_sizes =  create_train_val_slice(image_datasets,None)
#data, dataset_sizes =  create_train_val_slice(image_datasets,sample_size,val_same_as_train=False) 

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class_counts = count_classes(data['train'])

class_weights = imbalanced_classes_weights(class_counts)


print("Classes: ", class_names) 
print("Train classes frequencies: ",class_counts)

print(f'Train image size: {dataset_sizes["train"]}')
print(f'Validation image size: {dataset_sizes["val"]}')

Classes:  ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']
Train classes frequencies:  {6: 1442, 4: 3148, 3: 4627, 5: 3058, 0: 2225, 2: 2344}
Train image size: 16844
Validation image size: 5296


In [7]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
    
# # Get a batch of training data
# inputs, classes = next(iter(dataloaders['train']))
# # Make a grid from batch
# sample_train_images = torchvision.utils.make_grid(inputs)
# #imshow(sample_train_images, title=classes)
# print(f"classes={classes}")
# imshow(sample_train_images, title=[class_names[i] for i in classes])


In [8]:
def train_model(data, model, criterion, optimizer, scheduler, num_epochs=2, writer=None, checkpoint = None):
    since = time.time()

#     if checkpoint is None:
#         best_model_wts = copy.deepcopy(model.state_dict())
#         best_loss = math.inf
#         best_acc = 0.
#     else:
#         print('Val loss: {}, Val accuracy: {}'\
#               .format(checkpoint["best_val_loss"], checkpoint["best_val_accuracy"]))
#         model.load_state_dict(checkpoint['model_state_dict'])
#         best_model_wts = copy.deepcopy(model.state_dict())
#         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#         if scheduler is not None:
#             scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
#         best_loss = checkpoint['best_val_loss']
#         best_acc = checkpoint['best_val_accuracy']
           
    print("Starting epochs")
    outer = tqdm(total=num_epochs, desc='Epoch', position=0,ncols='100%')
    inner = {'train': tqdm(total=dataset_sizes['train']//BATCH_SIZE, desc='Train', position=0,ncols='100%'),
             'val': tqdm(total=dataset_sizes['val']//BATCH_SIZE,desc='Val  ', position=0,ncols='100%')}
    results = {'train': {'loss':1000.0,'acc':0.0}, 'val': {'loss':1000.0,'acc':0.0}}
    #inner_train = tqdm(total=num_epochs, desc='Epoch', position=0,ncols='100%')
    
    best_loss = -1
    best_acc = 0
    early_stopping = False
    
    for epoch in range(num_epochs):
        if early_stopping:
            break
        outer.update(1)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Handle tqdm inner loop counter
            inner[phase].reset()
            
            # Iterate over data.
            for i, (inputs, labels) in enumerate(data[phase]):
                
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()
                
                # Update inner tqdm, we are about to override the previous maximum, update maximum
                inner[phase].update(1) # Advance the tqdm counter
                l = results[phase]['loss']
                a = results[phase]['acc']
                results_phase_desc = f'prev[l,a]=[{l:.4f}/{a:.4f}]'
                inner[phase].desc = phase + ' ' + results_phase_desc

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                batch_loss = loss.item() * inputs.size(0)
                running_loss += batch_loss
                running_corrects += torch.sum(preds == labels.data)
                if writer is not None and phase=='train':
                    x_axis = 1000*(epoch + i/(dataset_sizes[phase]/BATCH_SIZE))
                    writer.add_scalar('batch loss',batch_loss/BATCH_SIZE,x_axis)
                # print("running_corrects =", running_corrects)
                
            if phase == 'train' and scheduler is not None:
                scheduler.step()
                
            #inner.write("running_corrects=", running_corrects, " epoch: ", epoch)
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            results[phase] =  {'loss': epoch_loss,'acc':epoch_acc}
            if writer is not None:
                x_axis = epoch 
                writer.add_scalar('accuracy-' + phase,
                    epoch_acc,
                    x_axis)
                
            loss_str = f'Epoch: {epoch+1} of {num_epochs}, {phase:6} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}'
            
            # Early stopping
            if phase == 'val' and (results['train']['acc'] > (results['val']['acc'] + 0.2)):
                print(f"Early stopping at epoch {epoch}: train acc={results['train']['acc']} , val_acc={results['val']['acc']}")
                early_stopping = True

            

#             # deep copy the model
#             if phase == 'val' and epoch_loss < best_loss:
#                 inner.write('New best model found!')
#                 inner.write(f'New record loss:{epoch_loss}, previous record loss: {best_loss}')
#                 best_loss = epoch_loss
#                 best_acc = epoch_acc
#                 best_model_wts = copy.deepcopy(model.state_dict())
#                 #save the weights
#                 torch.save(best_model_wts, CHECK_POINT_PATH)
        #print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Lest val Acc: {:.4f} Last val loss: {:.4f}'.format(results['val']['acc'], results['val']['loss']))

    # load best model weights
    # model.load_state_dict(best_model_wts)
    return model, best_loss, best_acc

In [9]:
def get_model():
    model_conv = torchvision.models.resnet18(pretrained=True)
    #model_conv = torchvision.models.resnet50(pretrained=True)
    #model_conv = torchvision.models.resnet101(pretrained=True)

    # Parameters of newly constructed modules have requires_grad=True by default

    # ct = 0
    # for child in model_conv.children():
    #   ct += 1
    #   # freezes layers 1-6 in the total 10 layers of Resnet50
    #   if ct < 7:
    #     for param in child.parameters():
    #       param.requires_grad = False


    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, len(class_names))
    model_conv = model_conv.to(device)
    return model_conv



In [10]:
#model_conv.eval()

# Train Model

In [11]:

'''two options to write the loss. They are both equal'''
# option 1 #
criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weights).to(device))

# option 2 #
# p = nn.functional.softmax(model_conv, dim=1)
# # to calculate loss using probabilities you can do below 
# criterion = nn.functional.nll_loss(torch.log(p), y)


In [12]:
CHECK_POINT_PATH = LABS_DIR/'ModelParams'/'checkpoint.tar'

# !del $CHECK_POINT_PATH

# try:
#     checkpoint = torch.load(CHECK_POINT_PATH)
#     print("checkpoint loaded")
# except:
#     checkpoint = None
#     print("checkpoint not found")
checkpoint = None

In [13]:
%%time
# Tensorboard Stuff
#lr_initial = 0.1
momentum = 0.9
scheduler_step_size = 7
scheduler_gamma = 0.3

def run_experiment(model, lr_initial, momentum, scheduler_gamma, scheduler_step_size):
    # Observe that only parameters of final layer are being optimized
    #optimizer_conv = optim.SGD(model.fc.parameters(), lr=lr_initial, momentum=momentum)
    
    weight_decay = 0.001
    
    optimizer_conv = optim.Adam(model.parameters(),lr=lr_initial,weight_decay=weight_decay)
    
    # e.g. Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=scheduler_step_size, gamma=scheduler_gamma)
#     exp_lr_scheduler = None
#     experiment_name = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") +\
#         f'_lr_{lr_initial}_mmt_{momentum}_st_{scheduler_step_size}_gma_{scheduler_gamma}'
    experiment_name = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") +\
        f'_ADAM_Weights_wdecay_{weight_decay}_imb_Scale01_sched_{scheduler_step_size}_gam_{scheduler_gamma}_lr_{lr_initial}'
    writer = SummaryWriter('runs/' + experiment_name)
    model_conv, best_val_loss, best_val_acc = train_model(data,
                                                          model,
                                                          criterion,
                                                          optimizer_conv,
                                                          exp_lr_scheduler,
                                                          num_epochs=50,
                                                          writer=writer,
                                                          checkpoint=checkpoint)
    return model_conv, best_val_loss, best_val_acc 

for lr in [0.001,0.005]: #[0.1,0.05,0.01]:
    for scheduler_step_size in [7]:
        for scheduler_gamma in [0.5]: #[0.1,0.2,0.3,0.4]:
            model = get_model()
            model_conv, best_val_loss, best_val_acc  = \
                run_experiment(model, lr, momentum, scheduler_gamma, scheduler_step_size)

Starting epochs


HBox(children=(FloatProgress(value=0.0, description='Epoch', layout=Layout(flex='2'), max=50.0, style=Progress…

HBox(children=(FloatProgress(value=0.0, description='Train', layout=Layout(flex='2'), max=526.0, style=Progres…

HBox(children=(FloatProgress(value=0.0, description='Val  ', layout=Layout(flex='2'), max=165.0, style=Progres…

Training complete in 67m 9s
Lest val Acc: 0.6299 Last val loss: 3.4033
Starting epochs


HBox(children=(FloatProgress(value=0.0, description='Epoch', layout=Layout(flex='2'), max=50.0, style=Progress…

HBox(children=(FloatProgress(value=0.0, description='Train', layout=Layout(flex='2'), max=526.0, style=Progres…

HBox(children=(FloatProgress(value=0.0, description='Val  ', layout=Layout(flex='2'), max=165.0, style=Progres…

Training complete in 66m 43s
Lest val Acc: 0.5476 Last val loss: 3.6881
Wall time: 2h 13min 54s


In [14]:
# torch.save({'model_state_dict': model_conv.state_dict(),
#             'optimizer_state_dict': optimizer_conv.state_dict(),
#             'best_val_loss': best_val_loss,
#             'best_val_accuracy': best_val_acc,
#             'scheduler_state_dict' : exp_lr_scheduler.state_dict(),
#             }, CHECK_POINT_PATH)

# Test Model

In [15]:
model_conv.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [16]:
from pprint import pprint
x='train'
d = datasets.ImageFolder(os.path.join(DATA_DIR, x))
cnt = Counter([])
for i,(image,category) in enumerate(d):
    cnt.update({(image_datasets['train'].classes)[category]:1})
print(cnt)    

Counter({'Happy': 4627, 'Neutral': 3148, 'Sad': 3058, 'Fear': 2344, 'Angry': 2225, 'Surprise': 1442})


In [17]:
image_datasets['train'].classes[0]

'Angry'

In [18]:
import torch as pt
cnt = Counter([])
for i,(inputs,labels) in enumerate(data['train']):
#     if i>5:
#         break;
    cnt.update(labels.numpy())
print(cnt)
print(cnt[3])

Counter({3: 4627, 4: 3148, 5: 3058, 2: 2344, 0: 2225, 6: 1442})
4627
