In [1]:
# Imports here
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt

# Import 
import json
import os
from collections import OrderedDict
import pandas as pd
import random
import torch
import torchvision
from torch import nn
from torchvision import transforms, models
from torchvision.datasets import ImageFolder

In [2]:
data_dir = '/notebooks/flowers'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'
#!unzip /flowers.zip

In [3]:
# TODO: Define your transforms for the training, validation, and testing sets
#data_transforms = # 224 -> 256
train_dir_transforms = transforms.Compose([transforms.RandomResizedCrop(224), # before all 225 ##256
                                           transforms.RandomHorizontalFlip(),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.485, 0.456, 0.406],
                                                                [0.229, 0.224, 0.225])])
valid_dir_transforms = transforms.Compose([transforms.Resize(224),
                                           transforms.CenterCrop(224),
                                           transforms.ToTensor(),
                                           transforms.Normalize([0.485, 0.456, 0.406],
                                                                [0.229, 0.224, 0.225])])
test_dir_transforms = transforms.Compose([transforms.Resize(224),
                                          transforms.CenterCrop(224),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406],
                                                               [0.229, 0.224, 0.225])])

# TODO: Load the datasets with ImageFolder
#image_datasets = 
train_data = ImageFolder(train_dir, train_dir_transforms)
valid_set = ImageFolder(valid_dir, valid_dir_transforms)
test_set = ImageFolder(test_dir, test_dir_transforms)

# TODO: Using the image datasets and the trainforms, define the dataloaders
#dataloaders = 
trainloader = torch.utils.data.DataLoader(train_data, batch_size =64, shuffle=True)
valid_data = torch.utils.data.DataLoader(valid_set, batch_size=64)
test_data = torch.utils.data.DataLoader(test_set, batch_size=64)

In [4]:
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

In [47]:
model = models.vit_b_16(weights='DEFAULT')

In [48]:
model

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate=none)
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_att

In [49]:
#freeze parameters
for param in model.parameters():
    param.requires_grad = False 

#new classifier for the feedforward
classifier = nn.Sequential(OrderedDict([
                            ('fc1', nn.Linear(768, 320)), #4608,320
                            ('relu', nn.ReLU()),
                            ('dropout', nn.Dropout(0.5)),
                            ('fc2', nn.Linear(320, 300)), #320,300
                            ('relu', nn.ReLU()),
                            ('dropout', nn.Dropout(0.5)),
                            ('fc3', nn.Linear(300, 102)), #300,102
                            ('output', nn.LogSoftmax(dim=1))
                            ]))
model.heads = classifier
print(model)

VisionTransformer(
  (conv_proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate=none)
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=3072, out_features=768, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (self_att

In [36]:
def validation(model, valid_data, criterion):
    test_loss = 0
    accuracy = 0
    for images, labels in valid_data:
        
        images,labels = images.to('cuda'), labels.to('cuda')
        
        output = model.forward(images)
        batch_loss = criterion(output, labels)
        test_loss += batch_loss.item()
        
        ps = torch.exp(output)
        equality = (labels.data == ps.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return test_loss, accuracy

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
criterion = nn.functional.nll_loss
epochs = 30
#running_loss = 0
model.to('cuda')
for epoch in range(epochs):
  model.train()
  counter = 0
  for data in trainloader:
    X , y= data
    X , y = X.to('cuda'), y.to('cuda')
    optimizer.zero_grad() # clear gradient information.
    #output = model(X) <- gives errors
    output = model.forward(X)
    loss = criterion(output, y)
    loss.backward() # do pack-propagation step
    optimizer.step() # tell optimizer that you finished batch/iteration.
    counter += 1
    #running_loss += loss.item()
 
 
  # network in eval mode for inference
  model.eval()
            
  # Turn off gradients for validation, saves memory and computations
  with torch.no_grad():
      test_loss, accuracy = validation(model, valid_data, criterion)
  print("Epoch: {}/{}...".format(epoch+1,epochs),
        "Loss: {:.4f}".format(loss.data),
        "Train Loss: {:.3f}.. ".format(test_loss/len(valid_data)),
        "Train Accuracy: {:.3f}".format(accuracy/len(valid_data)))
  running_loss = 0
  model.train()

Epoch: 1/30... Loss: 1.2649 Train Loss: 1.324..  Train Accuracy: 0.706
Epoch: 2/30... Loss: 1.8575 Train Loss: 1.303..  Train Accuracy: 0.714
Epoch: 3/30... Loss: 2.2802 Train Loss: 1.286..  Train Accuracy: 0.716
Epoch: 4/30... Loss: 1.9379 Train Loss: 1.268..  Train Accuracy: 0.718
Epoch: 5/30... Loss: 2.1690 Train Loss: 1.251..  Train Accuracy: 0.717
Epoch: 6/30... Loss: 1.5231 Train Loss: 1.236..  Train Accuracy: 0.720
Epoch: 7/30... Loss: 1.3185 Train Loss: 1.220..  Train Accuracy: 0.723
Epoch: 8/30... Loss: 1.3600 Train Loss: 1.205..  Train Accuracy: 0.723
Epoch: 9/30... Loss: 1.0285 Train Loss: 1.190..  Train Accuracy: 0.731
Epoch: 10/30... Loss: 1.6676 Train Loss: 1.175..  Train Accuracy: 0.739
Epoch: 11/30... Loss: 1.6910 Train Loss: 1.162..  Train Accuracy: 0.741
Epoch: 12/30... Loss: 1.8437 Train Loss: 1.147..  Train Accuracy: 0.742
Epoch: 13/30... Loss: 1.5992 Train Loss: 1.133..  Train Accuracy: 0.744
Epoch: 14/30... Loss: 1.1032 Train Loss: 1.120..  Train Accuracy: 0.747
E

In [51]:
# Evaluate the trained network.
total = 0
correct = 0
model.to('cuda')
with torch.no_grad():   # No need for keepnig track of necessary changes to the gradient.
  for X,y in test_data:
    X , y = X.to('cuda'), y.to('cuda')
    output = model(X)
    for idx, val in enumerate(output):
      if torch.argmax(val) == y[idx]:
        correct += 1
      total += 1
  print('Accuracy:', round(correct/total, 3))

Accuracy: 0.613
