In [1]:
import torch
import torchvision

import numpy as np

from torch.utils.data import Dataset


device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cpu for inference


In [15]:
# np.random.seed(20)
# g = torch.Generator()
# g.manual_seed(0)
# labels = labels[torch.randperm(labels.size()[0], generator=g)]

In [7]:
num_epochs = 4
batch_size = 256
learning_rate = 0.001


transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((.5, .5, .5), (.5, .5, .5)) # Sprawdzic najlepsze normalizacje dla MNIST i CIFAR
])
# Osobne transformy dla treningowego i validacyjnego (bez augmentacji)
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [8]:
class ConvNet(torch.nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 6, 5)
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(6, 16, 5)
        self.fc1 = torch.nn.Linear(16 * 5 * 5, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [9]:
model = ConvNet().to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # Spróbowac ADAM

In [11]:
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    g = torch.Generator()
    # Sprawdzic czy pierwszy batch zwraca te same labelki
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels[torch.randperm(labels.size()[0], generator=g)]

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc} %')

Epoch [1/4], Step [100/196], Loss: 2.2996
Epoch [2/4], Step [100/196], Loss: 2.2997
Epoch [3/4], Step [100/196], Loss: 2.2998
Epoch [4/4], Step [100/196], Loss: 2.2998
Finished Training


IndexError: index 16 is out of bounds for dimension 0 with size 16

In [7]:
for para in model.parameters():
    para.requires_grad = False
    print(para)

Parameter containing:
tensor([[[[-2.8641e-02,  9.7612e-02, -7.6139e-02,  8.8273e-02,  5.7440e-02],
          [ 9.3653e-02,  4.0799e-02,  7.5752e-02,  5.3413e-02,  3.5448e-02],
          [ 1.0850e-01,  7.4022e-02, -3.5309e-02, -3.5251e-02, -3.6772e-02],
          [ 1.1086e-01, -9.0336e-02,  6.5340e-02,  9.1066e-02, -9.2654e-02],
          [ 7.4652e-02, -1.2250e-02,  8.4167e-02,  5.3923e-02, -6.1007e-02]],

         [[ 1.3482e-01, -3.3622e-02,  6.6477e-02,  8.2783e-02,  7.5980e-02],
          [-7.0011e-03,  2.2431e-02,  1.3813e-01, -3.7662e-02, -2.0957e-02],
          [ 3.3099e-02,  7.8616e-02, -5.0706e-02,  1.6401e-02,  3.2377e-02],
          [ 7.7119e-02,  3.6294e-02,  5.5913e-02,  1.0017e-01,  8.4243e-02],
          [ 3.9509e-02,  7.2642e-04, -5.9012e-02,  6.2372e-02, -4.0406e-03]],

         [[ 1.4141e-01,  1.5086e-01,  4.3361e-02,  1.4693e-01,  1.3833e-02],
          [-2.7852e-02, -4.6459e-02,  1.4107e-01,  1.6062e-01, -6.0861e-02],
          [-2.1594e-02,  1.3829e-01,  1.0787e-01, 

In [8]:
for para in model.parameters():
    para.requires_grad = True
    print(para)

Parameter containing:
tensor([[[[-2.8641e-02,  9.7612e-02, -7.6139e-02,  8.8273e-02,  5.7440e-02],
          [ 9.3653e-02,  4.0799e-02,  7.5752e-02,  5.3413e-02,  3.5448e-02],
          [ 1.0850e-01,  7.4022e-02, -3.5309e-02, -3.5251e-02, -3.6772e-02],
          [ 1.1086e-01, -9.0336e-02,  6.5340e-02,  9.1066e-02, -9.2654e-02],
          [ 7.4652e-02, -1.2250e-02,  8.4167e-02,  5.3923e-02, -6.1007e-02]],

         [[ 1.3482e-01, -3.3622e-02,  6.6477e-02,  8.2783e-02,  7.5980e-02],
          [-7.0011e-03,  2.2431e-02,  1.3813e-01, -3.7662e-02, -2.0957e-02],
          [ 3.3099e-02,  7.8616e-02, -5.0706e-02,  1.6401e-02,  3.2377e-02],
          [ 7.7119e-02,  3.6294e-02,  5.5913e-02,  1.0017e-01,  8.4243e-02],
          [ 3.9509e-02,  7.2642e-04, -5.9012e-02,  6.2372e-02, -4.0406e-03]],

         [[ 1.4141e-01,  1.5086e-01,  4.3361e-02,  1.4693e-01,  1.3833e-02],
          [-2.7852e-02, -4.6459e-02,  1.4107e-01,  1.6062e-01, -6.0861e-02],
          [-2.1594e-02,  1.3829e-01,  1.0787e-01, 

In [9]:
params = model.state_dict()
params.keys()

odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias'])

In [10]:
model.fc3.weight.requires_grad = True
for name, param in model.named_parameters():
    if param.requires_grad:print(name)

conv1.weight
conv1.bias
conv2.weight
conv2.bias
fc1.weight
fc1.bias
fc2.weight
fc2.bias
fc3.weight
fc3.bias


In [11]:
for name, param in model.named_parameters():
    if param.requires_grad and 'fc3' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

In [12]:
for name, param in model.named_parameters():print(name, param)

conv1.weight Parameter containing:
tensor([[[[-2.8641e-02,  9.7612e-02, -7.6139e-02,  8.8273e-02,  5.7440e-02],
          [ 9.3653e-02,  4.0799e-02,  7.5752e-02,  5.3413e-02,  3.5448e-02],
          [ 1.0850e-01,  7.4022e-02, -3.5309e-02, -3.5251e-02, -3.6772e-02],
          [ 1.1086e-01, -9.0336e-02,  6.5340e-02,  9.1066e-02, -9.2654e-02],
          [ 7.4652e-02, -1.2250e-02,  8.4167e-02,  5.3923e-02, -6.1007e-02]],

         [[ 1.3482e-01, -3.3622e-02,  6.6477e-02,  8.2783e-02,  7.5980e-02],
          [-7.0011e-03,  2.2431e-02,  1.3813e-01, -3.7662e-02, -2.0957e-02],
          [ 3.3099e-02,  7.8616e-02, -5.0706e-02,  1.6401e-02,  3.2377e-02],
          [ 7.7119e-02,  3.6294e-02,  5.5913e-02,  1.0017e-01,  8.4243e-02],
          [ 3.9509e-02,  7.2642e-04, -5.9012e-02,  6.2372e-02, -4.0406e-03]],

         [[ 1.4141e-01,  1.5086e-01,  4.3361e-02,  1.4693e-01,  1.3833e-02],
          [-2.7852e-02, -4.6459e-02,  1.4107e-01,  1.6062e-01, -6.0861e-02],
          [-2.1594e-02,  1.3829e-01, 

In [13]:
for name, param in model.named_parameters():
    if param.requires_grad:print(name)

fc3.weight
fc3.bias


In [14]:
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i + 1) % 2000 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')

print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if (label == pred):
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network: {acc} %')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc} %')

Epoch [1/4], Step [2000/12500], Loss: 2.1092
Epoch [1/4], Step [4000/12500], Loss: 2.1398
Epoch [1/4], Step [6000/12500], Loss: 1.7118
Epoch [1/4], Step [8000/12500], Loss: 1.8115
Epoch [1/4], Step [10000/12500], Loss: 2.2887
Epoch [1/4], Step [12000/12500], Loss: 1.7904
Epoch [2/4], Step [2000/12500], Loss: 2.1692
Epoch [2/4], Step [4000/12500], Loss: 2.0884
Epoch [2/4], Step [6000/12500], Loss: 1.6860
Epoch [2/4], Step [8000/12500], Loss: 1.6932
Epoch [2/4], Step [10000/12500], Loss: 2.3648
Epoch [2/4], Step [12000/12500], Loss: 1.7367
Epoch [3/4], Step [2000/12500], Loss: 2.2079
Epoch [3/4], Step [4000/12500], Loss: 2.0870
Epoch [3/4], Step [6000/12500], Loss: 1.6993
Epoch [3/4], Step [8000/12500], Loss: 1.6396
Epoch [3/4], Step [10000/12500], Loss: 2.3981
Epoch [3/4], Step [12000/12500], Loss: 1.7188
Epoch [4/4], Step [2000/12500], Loss: 2.2264
Epoch [4/4], Step [4000/12500], Loss: 2.0886
Epoch [4/4], Step [6000/12500], Loss: 1.7161
Epoch [4/4], Step [8000/12500], Loss: 1.6095
Epoc