In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [17]:
import torch
print(torch.__version__)

1.0.0.dev20181207


In [18]:
torch.cuda.is_available()

True

In [19]:
os.listdir()

['projects',
 'envML',
 'data',
 'Flowers_pytorch_sgd_no_bias.ipynb',
 'Flowers_pytorch_sgd2.ipynb',
 'Flowers_pytorch_sgd_topScore.ipynb',
 '.ipynb_checkpoints',
 'Untitled.ipynb']

In [20]:
!ls

data			    Flowers_pytorch_sgd_no_bias.ipynb	Untitled.ipynb
envML			    Flowers_pytorch_sgd_topScore.ipynb
Flowers_pytorch_sgd2.ipynb  projects


In [21]:
data_dir = 'data/flower_data/'
PATH = data_dir

train_dir = 'train'
val_dir = 'valid'

In [22]:
# again, list total number of classes, and list them all
# os.list dir sorting depends on OS dependent file indexing, so leaving it as it is

classes = os.listdir(f'{data_dir}/{train_dir}')
classes.sort()
ClassesNumer = len(classes)
print("Class Total Count: ", ClassesNumer)
#print(classes)

Class Total Count:  102


In [23]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    train_dir: transforms.Compose([
        #transforms.Resize(224),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        #transforms.RandomRotation(degrees=90),
        #transforms.RandomRotation(degrees=45),
        transforms.ToTensor(),
       # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]),
    val_dir: transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
       # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]),
}

In [24]:

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),data_transforms[x]) for x in [train_dir, val_dir]}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=8) for x in [train_dir, val_dir]}

dataset_sizes = {x: len(image_datasets[x]) for x in [train_dir, val_dir]}

class_names = image_datasets[train_dir].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [25]:
print(dataloaders)
print(dataset_sizes)
print(device)

{'train': <torch.utils.data.dataloader.DataLoader object at 0x7f1a9ed73c50>, 'valid': <torch.utils.data.dataloader.DataLoader object at 0x7f1a93941e10>}
{'train': 6551, 'valid': 818}
cuda:0


In [26]:
print(image_datasets[train_dir])

Dataset ImageFolder
    Number of datapoints: 6551
    Root Location: data/flower_data/train
    Transforms (if any): Compose(
                             RandomResizedCrop(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                             RandomHorizontalFlip(p=0.5)
                             RandomVerticalFlip(p=0.5)
                             ToTensor()
                             Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                         )
    Target Transforms (if any): None


In [27]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('lr{}, Epoch {}/{}'.format(scheduler.get_lr(),epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in [train_dir, val_dir]:
            if phase == train_dir:
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == train_dir):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == train_dir:
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == val_dir and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [28]:
# Load a pretrained model and reset final fully connected layer

model_ft = models.densenet201(pretrained='imagenet')
#num_ftrs = model_ft.classifier.in_features
#model_ft.classifier = nn.Linear(num_ftrs, ClassesNumer)
model_ft.fc = nn.Linear( model_ft.classifier.out_features, ClassesNumer )

model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
#optimizer_ft = optim.Adam(model_ft.parameters(),lr=0.0005,amsgrad=True)
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.95)

# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=30, gamma=0.1)

In [29]:
# To view which layers are freeze and which layers are not freezed:
for name, child in model_ft.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

conv0.weight True
norm0.weight True
norm0.bias True
denseblock1.denselayer1.norm1.weight True
denseblock1.denselayer1.norm1.bias True
denseblock1.denselayer1.conv1.weight True
denseblock1.denselayer1.norm2.weight True
denseblock1.denselayer1.norm2.bias True
denseblock1.denselayer1.conv2.weight True
denseblock1.denselayer2.norm1.weight True
denseblock1.denselayer2.norm1.bias True
denseblock1.denselayer2.conv1.weight True
denseblock1.denselayer2.norm2.weight True
denseblock1.denselayer2.norm2.bias True
denseblock1.denselayer2.conv2.weight True
denseblock1.denselayer3.norm1.weight True
denseblock1.denselayer3.norm1.bias True
denseblock1.denselayer3.conv1.weight True
denseblock1.denselayer3.norm2.weight True
denseblock1.denselayer3.norm2.bias True
denseblock1.denselayer3.conv2.weight True
denseblock1.denselayer4.norm1.weight True
denseblock1.denselayer4.norm1.bias True
denseblock1.denselayer4.conv1.weight True
denseblock1.denselayer4.norm2.weight True
denseblock1.denselayer4.norm2.bias Tru

In [30]:
# Train and evaluate
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=60)

Epoch 0/59
----------
train Loss: 2.5873 Acc: 0.5479
valid Loss: 0.3518 Acc: 0.9132

Epoch 1/59
----------
train Loss: 0.5017 Acc: 0.8800
valid Loss: 0.2399 Acc: 0.9352

Epoch 2/59
----------
train Loss: 0.3368 Acc: 0.9174
valid Loss: 0.1921 Acc: 0.9523

Epoch 3/59
----------
train Loss: 0.2523 Acc: 0.9383
valid Loss: 0.1283 Acc: 0.9694

Epoch 4/59
----------
train Loss: 0.2052 Acc: 0.9483
valid Loss: 0.1093 Acc: 0.9719

Epoch 5/59
----------
train Loss: 0.1722 Acc: 0.9560
valid Loss: 0.1051 Acc: 0.9756

Epoch 6/59
----------
train Loss: 0.1666 Acc: 0.9574
valid Loss: 0.0942 Acc: 0.9817

Epoch 7/59
----------
train Loss: 0.1484 Acc: 0.9637
valid Loss: 0.0919 Acc: 0.9780

Epoch 8/59
----------
train Loss: 0.1342 Acc: 0.9653
valid Loss: 0.0939 Acc: 0.9817

Epoch 9/59
----------
train Loss: 0.1376 Acc: 0.9660
valid Loss: 0.0802 Acc: 0.9780

Epoch 10/59
----------
train Loss: 0.1109 Acc: 0.9742
valid Loss: 0.0863 Acc: 0.9817

Epoch 11/59
----------
train Loss: 0.1182 Acc: 0.9711
valid Loss

KeyboardInterrupt: 

In [None]:
#Freeze all layers first
for param in model_ft.parameters():
    param.requires_grad = False
    
# Then unfreeze last classification layer only for feature extract
for param in model_ft.classifier.parameters():
    param.requires_grad = True    

    
# To view which layers are freeze and which layers are not freezed:
for name, child in model_ft.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

In [None]:
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.7)
# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=10, gamma=0.01)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=40)

In [None]:
#serve that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.95, nesterov=True)
# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.01)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=60)

In [None]:
#UnFreeze all layers first
for param in model_ft.parameters():
    param.requires_grad = True 

# To view which layers are freeze and which layers are not freezed:
for name, child in model_ft.named_children():
  for name_2, params in child.named_parameters():
    print(name_2, params.requires_grad)

In [None]:
#serve that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)
# Decay LR by a factor of 0.1 every ? epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=20, gamma=0.01)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=50)