In [40]:
import torch 
import torchvision
from torchvision import datasets, models, transforms
import torch.nn as nn

In [41]:
# check if CUDA is available
train_on_gpu = torch.cuda.is_available()
# print if it's available or not
#print(train_on_gpu)

In [42]:
# VGG-16 Takes 224x224 images as input, so we resize all of them
data_transform = transforms.Compose([transforms.RandomResizedCrop(224), 
                                      transforms.Grayscale(3),
                                      transforms.ToTensor()])

In [43]:
# Load the pretrained model from pytorch
vgg16 = models.vgg16(pretrained=True)

In [44]:
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
    param.requires_grad = False

In [45]:
n_inputs = vgg16.classifier[6].in_features
# it will be numbers from 0 to 9
FinalDecisionLayer= nn.Linear(n_inputs,9)
# Replace the last classification Layer with our Fully connected layer
vgg16.classifier[6] = FinalDecisionLayer
# if GPU is available, move the model to GPU
if train_on_gpu:
    vgg16.cuda()
# check to see that your last layer produces the expected number of outputs
#print(vgg16.classifier[6].out_features)

RuntimeError: CUDA out of memory. Tried to allocate 392.00 MiB (GPU 0; 4.00 GiB total capacity; 2.72 GiB already allocated; 42.61 MiB free; 168.75 MiB cached)

In [23]:
import torch.optim as optim

# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001)

In [30]:
# loading the MNIST data
batch_size_train= 10
batch_size_test=  10

In [31]:
train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=True, download=True,
                             transform= data_transform),
  batch_size=batch_size_train, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST('/files/', train=False, download=True,
                             transform=data_transform),
  batch_size=batch_size_test, shuffle=True)

In [32]:
n_epochs = 5

for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    # model by default is set to train
    for batch_i, (data, target) in enumerate(train_loader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = vgg16(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss 
        train_loss += loss.item()
        
        if batch_i % 20 == 19:    # print training loss every specified number of mini-batches
            print('Epoch %d, Batch %d loss: %.16f' %
                  (epoch, batch_i + 1, train_loss / 20))
            train_loss = 0.0

RuntimeError: CUDA out of memory. Tried to allocate 124.00 MiB (GPU 0; 4.00 GiB total capacity; 2.86 GiB already allocated; 44.61 MiB free; 23.51 MiB cached)