In [0]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy


plt.ion()   # interactive mode
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [0]:
data_dir = "data/"

In [3]:
from data.load_data import load_datasets
train_dataset, val_dataset, test_dataset, classes = load_datasets(data_dir)
class_names = train_dataset.classes
print(len(class_names))

dataset_sizes = {"train": len(train_dataset), "val": len(val_dataset)}

Stanford Dogs Dataset already downloaded
Training set stats:
10306 samples spanning 120 classes (avg 85.883333 per class)
Validation set stats:
2571 samples spanning 120 classes (avg 21.425000 per class)
Testing set stats:
9249 samples spanning 120 classes (avg 77.075000 per class)
120


In [0]:
kwargs = {'num_workers': 1, 'pin_memory': True}
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset,
                 batch_size=batch_size, shuffle=True, **kwargs)
val_loader = torch.utils.data.DataLoader(val_dataset,
                 batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,
                 batch_size=batch_size, shuffle=True, **kwargs)
dataloaders = {"train": train_loader, "val": val_loader}

In [0]:
def evaluate(model, optimizer, criterion):
    model.eval()
    running_loss = 0
    running_corrects = 0
    n_examples = 0
    with torch.no_grad():
        
        model.eval()   # Set model to evaluate mode

        for inputs, labels in test_loader:
        
          inputs = inputs.to(device)
          # inputs = inputs.cuda()
          labels = labels.to(device)
          # labels = labels.cuda()

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward
          # track history if only in train
          with torch.set_grad_enabled(False):
              outputs = model(inputs)
              _, preds = torch.max(outputs, 1)
              loss = criterion(outputs, labels)

          # statistics
          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)
          n_examples += preds.size(0)

    epoch_loss = running_loss / len(test_dataset)
    epoch_acc = 100. * running_corrects.double() / len(test_dataset)
    running_loss /= n_examples
    return running_loss, running_corrects, n_examples, epoch_acc

In [0]:
def train_model(model, model_name, criterion, optimizer, scheduler, hp_info, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    file_name = "{model_name}_lr_{lr}_mom_{momentum}.log"
    file_name = file_name.format(model_name=model_name, lr=hp_info['lr'], momentum=hp_info['momentum'])
    log_file = open(file_name, 'w')
    
    print('-'*10)
    print('learning rate: {}, momentum: {}'.format(hp_info['lr'], hp_info['momentum']))

    for epoch in range(num_epochs):
        # print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        # print('-' * 10)

        # Each epoch has a training and validation phase
        log_file_string = ""
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                # inputs = inputs.cuda()
                labels = labels.to(device)
                # labels = labels.cuda()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            
            if phase == 'train':
                log_file_string = 'Train Epoch: {}\t Train Loss: {:.6f}\t Train Acc:{}\t '.format(epoch, epoch_loss, epoch_acc)
            else:
                log_file_string += 'Val Loss: {}\t Val Acc: {}\n'.format(epoch_loss, epoch_acc)
                log_file.write(log_file_string)
                print(log_file_string)
            # log_file.write(log_file_string)
            # print(log_file_string)

            # print('{} Loss: {:.4f} Acc: {:.4f}'.format(
            #     phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        # print()

    time_elapsed = time.time() - since
    # print('Training complete in {:.0f}m {:.0f}s'.format(
    #     time_elapsed // 60, time_elapsed % 60))
    # print('Best val Acc: {:4f}'.format(best_acc))  

    # load best model weights
    model.load_state_dict(best_model_wts)

    test_loss, test_correct, test_n_examples, test_acc = evaluate(model, optimizer, criterion)
    log_file_string = '\ntest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
    log_file_string = log_file_string.format(test_loss, test_correct, test_n_examples, test_acc)
    log_file.write(log_file_string)
    print(log_file_string)

    log_file.close()

    return model, test_acc, best_acc


In [0]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [0]:
class BasicCNN(nn.Module):
  def __init__(self, num_classes):
    super(BasicCNN, self).__init__()
    channels_in = 3 # RGB Image
    self.conv_layer1 = nn.Sequential(
        nn.Conv2d(3, 32, 4),
        nn.BatchNorm2d(32),
        nn.ReLU(inplace=True),
        nn.AvgPool2d(2, 2),
        nn.Dropout(p=0.1)
    )
    self.conv_layer2 = nn.Sequential(
        nn.Conv2d(32, 64, 3),
        nn.BatchNorm2d(64),
        nn.ReLU(inplace=True),
        nn.AvgPool2d(2, 2),
        nn.Dropout(p=0.1)
    )
    self.conv_layer3 = nn.Sequential(
        nn.Conv2d(64, 128, 3),
        nn.BatchNorm2d(128),
        nn.ReLU(inplace=True),
        nn.AvgPool2d(2, 2),
        nn.Dropout(p=0.1)
    )
    self.max_pool1 = nn.MaxPool2d(2, 2)
    self.conv_layer4 = nn.Sequential(
        nn.Conv2d(128, 256, 3),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.AvgPool2d(2, 2),
        nn.Dropout(p=0.1)
    )
    self.max_pool2 = nn.MaxPool2d(2, 2)

    self.fc1 = nn.Linear(1024, 512)
    self.fc2 = nn.Linear(512, num_classes)

  def forward(self, x):
    # print(x.size())

    x = self.conv_layer1(x)
    x = self.conv_layer2(x)
    x = self.conv_layer3(x)
    x = self.max_pool1(x)
    x = self.conv_layer4(x)
    x = self.max_pool2(x)
    
    x = x.view(x.size(0), -1)
    out = self.fc2(self.fc1(x))

    return out

In [0]:
learning_rates = [0.001, 0.01, 0.1]
momentums = [0.99, 0.9, 0.5]

model_type = "basicmodel"
my_models = []

criterion = nn.CrossEntropyLoss()

for lr in learning_rates:
    for mom in momentums:
      model_conv = BasicCNN(120)
      model_conv = model_conv.to(device)

      params = model_conv.parameters()
      optimizer_conv = optim.SGD(params, lr=lr, momentum=mom)

      # Decay LR by a factor of 0.1 every 7 epochs
      exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)
      hp_info = {'lr': lr, 'momentum': mom}
      my_models.append({'model': model_conv, 'optimizer': optimizer_conv, 'exp_lr_scheduler': exp_lr_scheduler, 'hp_info': hp_info})

In [20]:
# model_conv = train_model(model_conv, criterion, optimizer_conv,
#                          exp_lr_scheduler, num_epochs=25)

best_test_acc = 0
best_val_acc = 0
best_model = copy.deepcopy(my_models[0]['model'].state_dict())
best_model_hp_info = copy.deepcopy(my_models[0]['hp_info'])
for model_i in my_models:
    model, test_acc, val_acc = train_model(model_i['model'],
                                           model_type, 
                                           criterion, 
                                           model_i['optimizer'],
                                           model_i['exp_lr_scheduler'], 
                                           model_i['hp_info'], 
                                           num_epochs=10)
    if test_acc > best_test_acc:
        best_test_acc = test_acc
        best_val_acc = val_acc
        best_model = copy.deepcopy(model.state_dict())
        best_model_hp_info = copy.deepcopy(model_i['hp_info'])

model_conv = best_model

----------
learning rate: 0.001, momentum: 0.99
Train Epoch: 0	 Train Loss: 4.651094	 Train Acc:0.025033960799534252	 Val Loss: 4.521221354331362	 Val Acc: 0.034227926876701675

Train Epoch: 1	 Train Loss: 4.472383	 Train Acc:0.0392004657481079	 Val Loss: 4.429705133701474	 Val Acc: 0.041229093737845196

Train Epoch: 2	 Train Loss: 4.400091	 Train Acc:0.045022317096836795	 Val Loss: 4.370075723996009	 Val Acc: 0.044340723453908985

Train Epoch: 3	 Train Loss: 4.332304	 Train Acc:0.0504560450223171	 Val Loss: 4.329779992721184	 Val Acc: 0.051341890315052506

Train Epoch: 4	 Train Loss: 4.270853	 Train Acc:0.05598680380360955	 Val Loss: 4.270765565889255	 Val Acc: 0.056009334889148193

Train Epoch: 5	 Train Loss: 4.229456	 Train Acc:0.06452551911507859	 Val Loss: 4.237419726643661	 Val Acc: 0.055231427460132244

Train Epoch: 6	 Train Loss: 4.129649	 Train Acc:0.07151174073355326	 Val Loss: 4.250464628569025	 Val Acc: 0.06223259432127577

Train Epoch: 7	 Train Loss: 4.032067	 Train Acc:0.