In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [2]:
import torch
print(torch.__version__)

1.0.0.dev20181207


In [3]:
torch.cuda.is_available()

True

In [4]:
os.listdir()

['Flowers_pytorch_sgd.ipynb',
 'projects',
 'Flowers_pytorch_adam.ipynb',
 'envML',
 'data',
 '.ipynb_checkpoints',
 'Untitled.ipynb']

In [5]:
!ls

data   Flowers_pytorch_adam.ipynb  projects
envML  Flowers_pytorch_sgd.ipynb   Untitled.ipynb


In [6]:
data_dir = 'data/flower_data/'
PATH = data_dir

train_dir = 'train'
val_dir = 'valid'

In [7]:
# again, list total number of classes, and list them all
# os.list dir sorting depends on OS dependent file indexing, so leaving it as it is

classes = os.listdir(f'{data_dir}/{train_dir}')
classes.sort()
ClassesNumer = len(classes)
print("Class Total Count: ", ClassesNumer)
#print(classes)

Class Total Count:  102


In [8]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    train_dir: transforms.Compose([
        #transforms.Resize(224),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        #transforms.RandomRotation(degrees=90),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
       # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]),
    val_dir: transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]),
}

In [9]:

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),data_transforms[x]) for x in [train_dir, val_dir]}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16, shuffle=True, num_workers=4) for x in [train_dir, val_dir]}

dataset_sizes = {x: len(image_datasets[x]) for x in [train_dir, val_dir]}

class_names = image_datasets[train_dir].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [10]:
print(dataloaders)
print(dataset_sizes)
print(device)

{'train': <torch.utils.data.dataloader.DataLoader object at 0x7f841bec4978>, 'valid': <torch.utils.data.dataloader.DataLoader object at 0x7f841becacc0>}
{'train': 6551, 'valid': 818}
cuda:0


In [11]:
print(image_datasets[train_dir])

Dataset ImageFolder
    Number of datapoints: 6551
    Root Location: data/flower_data/train
    Transforms (if any): Compose(
                             RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                             RandomHorizontalFlip(p=0.5)
                             RandomVerticalFlip(p=0.5)
                             ToTensor()
                             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                         )
    Target Transforms (if any): None


In [12]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in [train_dir, val_dir]:
            if phase == train_dir:
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == train_dir):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == train_dir:
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == val_dir and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [13]:
# Load a pretrained model and reset final fully connected layer

model_ft = models.densenet201(pretrained='imagenet')
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, ClassesNumer)

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
#optimizer_ft = optim.Adam(model_ft.parameters(),lr=0.0005,amsgrad=True)
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=15, gamma=0.1)

  nn.init.kaiming_normal(m.weight.data)


In [14]:
# To view which layers are freeze and which layers are not freezed:
for name, child in model_ft.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

conv0.weight True
norm0.weight True
norm0.bias True
denseblock1.denselayer1.norm1.weight True
denseblock1.denselayer1.norm1.bias True
denseblock1.denselayer1.conv1.weight True
denseblock1.denselayer1.norm2.weight True
denseblock1.denselayer1.norm2.bias True
denseblock1.denselayer1.conv2.weight True
denseblock1.denselayer2.norm1.weight True
denseblock1.denselayer2.norm1.bias True
denseblock1.denselayer2.conv1.weight True
denseblock1.denselayer2.norm2.weight True
denseblock1.denselayer2.norm2.bias True
denseblock1.denselayer2.conv2.weight True
denseblock1.denselayer3.norm1.weight True
denseblock1.denselayer3.norm1.bias True
denseblock1.denselayer3.conv1.weight True
denseblock1.denselayer3.norm2.weight True
denseblock1.denselayer3.norm2.bias True
denseblock1.denselayer3.conv2.weight True
denseblock1.denselayer4.norm1.weight True
denseblock1.denselayer4.norm1.bias True
denseblock1.denselayer4.conv1.weight True
denseblock1.denselayer4.norm2.weight True
denseblock1.denselayer4.norm2.bias Tru

In [15]:
# Train and evaluate
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=45)

Epoch 0/44
----------
train Loss: 2.7714 Acc: 0.4335
valid Loss: 0.8263 Acc: 0.8533

Epoch 1/44
----------
train Loss: 0.9385 Acc: 0.8274
valid Loss: 0.2708 Acc: 0.9450

Epoch 2/44
----------
train Loss: 0.5337 Acc: 0.8994
valid Loss: 0.1812 Acc: 0.9621

Epoch 3/44
----------
train Loss: 0.3917 Acc: 0.9208
valid Loss: 0.1456 Acc: 0.9609

Epoch 4/44
----------
train Loss: 0.3088 Acc: 0.9360
valid Loss: 0.1197 Acc: 0.9707

Epoch 5/44
----------
train Loss: 0.2723 Acc: 0.9426
valid Loss: 0.1088 Acc: 0.9731

Epoch 6/44
----------
train Loss: 0.2303 Acc: 0.9505
valid Loss: 0.0902 Acc: 0.9768

Epoch 7/44
----------
train Loss: 0.2053 Acc: 0.9544
valid Loss: 0.0920 Acc: 0.9817

Epoch 8/44
----------
train Loss: 0.1890 Acc: 0.9574
valid Loss: 0.0922 Acc: 0.9768

Epoch 9/44
----------
train Loss: 0.1701 Acc: 0.9609
valid Loss: 0.0827 Acc: 0.9829

Epoch 10/44
----------
train Loss: 0.1513 Acc: 0.9638
valid Loss: 0.0806 Acc: 0.9841

Epoch 11/44
----------
train Loss: 0.1506 Acc: 0.9689
valid Loss

In [16]:
#Freeze all layers first
for param in model_ft.parameters():
    param.requires_grad = False
    
# Then unfreeze last classification layer only for feature extract
for param in model_ft.classifier.parameters():
    param.requires_grad = True    

    
# To view which layers are freeze and which layers are not freezed:
for name, child in model_ft.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

conv0.weight False
norm0.weight False
norm0.bias False
denseblock1.denselayer1.norm1.weight False
denseblock1.denselayer1.norm1.bias False
denseblock1.denselayer1.conv1.weight False
denseblock1.denselayer1.norm2.weight False
denseblock1.denselayer1.norm2.bias False
denseblock1.denselayer1.conv2.weight False
denseblock1.denselayer2.norm1.weight False
denseblock1.denselayer2.norm1.bias False
denseblock1.denselayer2.conv1.weight False
denseblock1.denselayer2.norm2.weight False
denseblock1.denselayer2.norm2.bias False
denseblock1.denselayer2.conv2.weight False
denseblock1.denselayer3.norm1.weight False
denseblock1.denselayer3.norm1.bias False
denseblock1.denselayer3.conv1.weight False
denseblock1.denselayer3.norm2.weight False
denseblock1.denselayer3.norm2.bias False
denseblock1.denselayer3.conv2.weight False
denseblock1.denselayer4.norm1.weight False
denseblock1.denselayer4.norm1.bias False
denseblock1.denselayer4.conv1.weight False
denseblock1.denselayer4.norm2.weight False
denseblock1.d

In [17]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.5)
# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=30)

Epoch 0/29
----------
train Loss: 0.0761 Acc: 0.9843
valid Loss: 0.0511 Acc: 0.9914

Epoch 1/29
----------
train Loss: 0.0880 Acc: 0.9811
valid Loss: 0.0496 Acc: 0.9914

Epoch 2/29
----------
train Loss: 0.0836 Acc: 0.9835
valid Loss: 0.0517 Acc: 0.9927

Epoch 3/29
----------
train Loss: 0.0783 Acc: 0.9829
valid Loss: 0.0515 Acc: 0.9927

Epoch 4/29
----------
train Loss: 0.0840 Acc: 0.9840
valid Loss: 0.0497 Acc: 0.9939

Epoch 5/29
----------
train Loss: 0.0815 Acc: 0.9812
valid Loss: 0.0477 Acc: 0.9939

Epoch 6/29
----------
train Loss: 0.0831 Acc: 0.9828
valid Loss: 0.0485 Acc: 0.9927

Epoch 7/29
----------
train Loss: 0.0918 Acc: 0.9803
valid Loss: 0.0551 Acc: 0.9890

Epoch 8/29
----------
train Loss: 0.0833 Acc: 0.9831
valid Loss: 0.0508 Acc: 0.9914

Epoch 9/29
----------
train Loss: 0.0866 Acc: 0.9812
valid Loss: 0.0499 Acc: 0.9939

Epoch 10/29
----------
train Loss: 0.0863 Acc: 0.9814
valid Loss: 0.0521 Acc: 0.9914

Epoch 11/29
----------
train Loss: 0.0858 Acc: 0.9802
valid Loss

In [19]:
#serve that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.95)
# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=60)

Epoch 0/59
----------
train Loss: 0.0842 Acc: 0.9817
valid Loss: 0.0542 Acc: 0.9902

Epoch 1/59
----------
train Loss: 0.0963 Acc: 0.9773
valid Loss: 0.0491 Acc: 0.9927

Epoch 2/59
----------
train Loss: 0.0837 Acc: 0.9812
valid Loss: 0.0511 Acc: 0.9902

Epoch 3/59
----------
train Loss: 0.0779 Acc: 0.9824
valid Loss: 0.0506 Acc: 0.9902

Epoch 4/59
----------
train Loss: 0.0811 Acc: 0.9817
valid Loss: 0.0460 Acc: 0.9927

Epoch 5/59
----------
train Loss: 0.0838 Acc: 0.9818
valid Loss: 0.0530 Acc: 0.9927

Epoch 6/59
----------
train Loss: 0.0809 Acc: 0.9814
valid Loss: 0.0498 Acc: 0.9939

Epoch 7/59
----------
train Loss: 0.0820 Acc: 0.9794
valid Loss: 0.0509 Acc: 0.9890

Epoch 8/59
----------
train Loss: 0.0856 Acc: 0.9791
valid Loss: 0.0522 Acc: 0.9878

Epoch 9/59
----------
train Loss: 0.0701 Acc: 0.9831
valid Loss: 0.0426 Acc: 0.9927

Epoch 10/59
----------
train Loss: 0.0677 Acc: 0.9835
valid Loss: 0.0467 Acc: 0.9902

Epoch 11/59
----------
train Loss: 0.0794 Acc: 0.9803
valid Loss

In [20]:
#UnFreeze all layers first
for param in model_ft.parameters():
    param.requires_grad = True 

# To view which layers are freeze and which layers are not freezed:
for name, child in model_ft.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

conv0.weight True
norm0.weight True
norm0.bias True
denseblock1.denselayer1.norm1.weight True
denseblock1.denselayer1.norm1.bias True
denseblock1.denselayer1.conv1.weight True
denseblock1.denselayer1.norm2.weight True
denseblock1.denselayer1.norm2.bias True
denseblock1.denselayer1.conv2.weight True
denseblock1.denselayer2.norm1.weight True
denseblock1.denselayer2.norm1.bias True
denseblock1.denselayer2.conv1.weight True
denseblock1.denselayer2.norm2.weight True
denseblock1.denselayer2.norm2.bias True
denseblock1.denselayer2.conv2.weight True
denseblock1.denselayer3.norm1.weight True
denseblock1.denselayer3.norm1.bias True
denseblock1.denselayer3.conv1.weight True
denseblock1.denselayer3.norm2.weight True
denseblock1.denselayer3.norm2.bias True
denseblock1.denselayer3.conv2.weight True
denseblock1.denselayer4.norm1.weight True
denseblock1.denselayer4.norm1.bias True
denseblock1.denselayer4.conv1.weight True
denseblock1.denselayer4.norm2.weight True
denseblock1.denselayer4.norm2.bias Tru

In [21]:
#serve that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.8)
# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=50)

Epoch 0/49
----------
train Loss: 0.0680 Acc: 0.9840
valid Loss: 0.0453 Acc: 0.9927

Epoch 1/49
----------
train Loss: 0.0591 Acc: 0.9846
valid Loss: 0.0441 Acc: 0.9939

Epoch 2/49
----------
train Loss: 0.0786 Acc: 0.9805
valid Loss: 0.0460 Acc: 0.9939

Epoch 3/49
----------
train Loss: 0.0607 Acc: 0.9847
valid Loss: 0.0435 Acc: 0.9951

Epoch 4/49
----------
train Loss: 0.0667 Acc: 0.9838
valid Loss: 0.0463 Acc: 0.9939

Epoch 5/49
----------
train Loss: 0.0758 Acc: 0.9808
valid Loss: 0.0459 Acc: 0.9902

Epoch 6/49
----------
train Loss: 0.0645 Acc: 0.9840
valid Loss: 0.0463 Acc: 0.9939

Epoch 7/49
----------
train Loss: 0.0675 Acc: 0.9837
valid Loss: 0.0460 Acc: 0.9939

Epoch 8/49
----------
train Loss: 0.0743 Acc: 0.9811
valid Loss: 0.0488 Acc: 0.9939

Epoch 9/49
----------
train Loss: 0.0691 Acc: 0.9820
valid Loss: 0.0476 Acc: 0.9939

Epoch 10/49
----------
train Loss: 0.0671 Acc: 0.9831
valid Loss: 0.0478 Acc: 0.9927

Epoch 11/49
----------
train Loss: 0.0634 Acc: 0.9835
valid Loss