In [1]:
import torch
from torch import utils
import numpy as np
import torchvision as tv
from torchvision.models import vgg16 as VGG
from torchvision.datasets import Imagenette
import torchvision.transforms as tfs
import torch.nn as nn
from sklearn.model_selection import train_test_split 

In [2]:
transform = tfs.Compose([tfs.ToTensor(), tfs.Resize((320, 320))])
# Change download to 'True' if you don't have the dataset downloaded to your machine
data = Imagenette(root="./data", download=False, transform=transform)

In [3]:
len(data)

9469

In [5]:
# Create a 60/20/20 split for the dataset
most, test = train_test_split(data, random_state=1, test_size=0.2)
train, val = train_test_split(most, random_state=1, test_size = 0.25)

# Put the data into DataLoaders
train_loader = utils.data.DataLoader(train, batch_size=32, shuffle=True)
valid_loader = utils.data.DataLoader(val, batch_size=32, shuffle=False)
test_loader = utils.data.DataLoader(test, batch_size=32, shuffle=False)


In [6]:
class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(), 
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer5 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer6 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU())
        self.layer7 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer8 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer9 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer10 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.layer11 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer12 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU())
        self.layer13 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(7*7*512, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [7]:
num_classes = 10
num_epochs = 10
batch_size = 32
learning_rate = 0.005

model = VGG16(num_classes)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  



In [8]:

# Train the model
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x51200 and 25088x4096)

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))

In [15]:
data[0]

(tensor([[[0.5508, 0.5232, 0.4149,  ..., 0.7609, 0.7742, 0.7208],
          [0.2876, 0.5277, 0.8152,  ..., 0.8106, 0.7899, 0.7621],
          [0.4784, 0.4672, 0.8009,  ..., 0.7503, 0.7396, 0.7332],
          ...,
          [0.4478, 0.4331, 0.4392,  ..., 0.5047, 0.4253, 0.4028],
          [0.4060, 0.3928, 0.3979,  ..., 0.4488, 0.4144, 0.4015],
          [0.3653, 0.3543, 0.3689,  ..., 0.4082, 0.3856, 0.3765]],
 
         [[0.5934, 0.5467, 0.3624,  ..., 0.7901, 0.8086, 0.7630],
          [0.3087, 0.5571, 0.8067,  ..., 0.8517, 0.8333, 0.8120],
          [0.4926, 0.4772, 0.8364,  ..., 0.7874, 0.7789, 0.7798],
          ...,
          [0.6448, 0.6195, 0.5799,  ..., 0.5250, 0.4368, 0.4220],
          [0.5772, 0.5563, 0.5346,  ..., 0.4523, 0.4264, 0.4235],
          [0.5048, 0.4985, 0.5190,  ..., 0.4189, 0.4109, 0.4063]],
 
         [[0.3293, 0.3436, 0.2679,  ..., 0.7691, 0.7763, 0.7279],
          [0.2025, 0.3024, 0.5743,  ..., 0.8211, 0.7901, 0.7660],
          [0.4485, 0.3916, 0.6467,  ...,

In [16]:
data[4]

(tensor([[[0.4678, 0.4098, 0.3862,  ..., 0.3556, 0.3994, 0.3653],
          [0.4465, 0.4428, 0.4174,  ..., 0.4494, 0.3903, 0.3513],
          [0.3951, 0.4606, 0.4425,  ..., 0.4857, 0.4000, 0.3579],
          ...,
          [0.4200, 0.4172, 0.4478,  ..., 0.2593, 0.2950, 0.3234],
          [0.5500, 0.4911, 0.4515,  ..., 0.2241, 0.2952, 0.2828],
          [0.5675, 0.4733, 0.3560,  ..., 0.2132, 0.2731, 0.3180]],
 
         [[0.6055, 0.5239, 0.5376,  ..., 0.4930, 0.5120, 0.4580],
          [0.5574, 0.5933, 0.5832,  ..., 0.4531, 0.4764, 0.4768],
          [0.4887, 0.5770, 0.5372,  ..., 0.4606, 0.4674, 0.4516],
          ...,
          [0.4546, 0.5061, 0.5220,  ..., 0.4300, 0.4852, 0.4719],
          [0.5826, 0.5373, 0.5274,  ..., 0.3699, 0.4787, 0.4450],
          [0.6369, 0.5540, 0.4451,  ..., 0.3214, 0.4294, 0.4834]],
 
         [[0.3481, 0.2883, 0.2069,  ..., 0.1620, 0.2123, 0.1894],
          [0.2328, 0.2921, 0.2165,  ..., 0.2506, 0.1935, 0.1697],
          [0.1518, 0.2612, 0.1850,  ...,

In [17]:
data[4000]

(tensor([[[2.7007e-03, 7.4773e-03, 8.4504e-03,  ..., 4.9960e-03,
           0.0000e+00, 0.0000e+00],
          [7.6000e-03, 1.2759e-02, 7.1699e-03,  ..., 4.9960e-03,
           0.0000e+00, 0.0000e+00],
          [1.5862e-02, 5.2861e-03, 1.5413e-03,  ..., 5.3995e-03,
           1.2779e-04, 0.0000e+00],
          ...,
          [0.0000e+00, 1.0717e-03, 3.7075e-03,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 9.1285e-05, 5.2982e-04,  ..., 1.4238e-03,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.6653e-03,
           0.0000e+00, 0.0000e+00]],
 
         [[1.6533e-02, 1.0714e-03, 3.3587e-03,  ..., 1.5204e-03,
           0.0000e+00, 0.0000e+00],
          [4.5164e-04, 1.3596e-03, 4.9463e-03,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [4.2266e-03, 5.9889e-03, 1.1153e-02,  ..., 1.4179e-03,
           1.2779e-04, 0.0000e+00],
          ...,
          [0.0000e+00, 1.0717e-03, 3.7075e-03,  ..., 3.381