In [11]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F # for activation functions. On the other hand, for adding act. func. we can use torch.nn as well.
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision

In [12]:
# device setup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [27]:
# LOAD PRE TRAINED MODEL AND MODIFY

class Identity(nn.Module): # this class is for modifying pre-trained model.
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

model = torchvision.models.vgg16(pretrained=True)
for param in model.parameters():
    param.requires_grad=False # cuz I do not want to train VGG model, want to train rest of the network. Faster training results.

model.avgpool = Identity() # we are discard avgpool layer on VGG16
# model.classifier = nn.Linear(512, 10) # modifying linear layer as fitting our situation
model.classifier = nn.Sequential(nn.Linear(512, 100), 
                                 nn.ReLU(),
                                 nn.Linear(100, 10))
# fine tuning area. Fine tuning is a process that takes a model that has already been trained for one given task and the tunes or tweaks the model to make it perform a second similar task.
model.to(device=device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [24]:
# hyperparameters

in_channel = 3
num_classes = 10
learning_rate = 1e-3
batch_size = 64
num_epochs = 2
load_model = True

In [25]:
train_dataset = datasets.CIFAR10(root='dataset/', train=True, transform=transforms.ToTensor(), download=True) # ToTensor() converts original data that loaded from dataset library as np.array, to tensor. 
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) # Shuffle makes us sure about do not have same images on batches on different epochs.

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Files already downloaded and verified


In [26]:
for epoch in range(num_epochs):
    losses = []

    for batch_idx, (data, targets) in enumerate(train_loader): # data = img tensor, targets = labels tensor in related batch.
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        losses.append(loss.item())
        
        # backward
        optimizer.zero_grad() # set gradient putput to zero for re-calculate gradient in every loop. We DO NOT want get interacted by previous gradient value.
        loss.backward() # compute gradient for every parameter

        # gradient descent or adam step 
        optimizer.step() # performs parameter update based on current computed gradient.
    mean_loss = sum(losses) / len(losses) # avarage of losses list.
    print(f"Loss at epoch {epoch} was {mean_loss:.5f}")

Loss at epoch 0 was 1.32735
Loss at epoch 1 was 1.15469
