In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.backends.cudnn.benchmark=True

# Net

In [60]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5) #(in, out, kernel_size)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1   = nn.Linear(16*5*5, 120) #(in, out)
        self.fc2   = nn.Linear(120,84)
        self.fc3   = nn.Linear(84,10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size         = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

if __name__ == "__main__":
    with torch.cuda.device(0): # doesnt effin work!
        ## DEFINE
        net = Net()
        net = net.cuda()
        criterion = nn.MSELoss()
        optimizer = optim.SGD(net.parameters(), lr=0.01)

        ## INITIALIZE
        x  = torch.randn(1,1,32,32, device=torch.device('cuda'))
        y_ = torch.randn(10, device=torch.device('cuda'))
        print ('x:', type(x))
        y_ = y_.view(1,-1)
        y  = net(x)

        ## RUN
        optimizer.zero_grad()
        print (net.conv1.bias.grad)
        loss = criterion(y, y_)
        loss.backward(retain_graph=True)
        optimizer.step()
        print (net.conv1.bias.grad)

        verbose = 0
        if verbose:
            print (net)
            print ('\n ============== \n')
            params = list(net.parameters())
            for each in params:
                print (each.size(), each.name)
            print ('\n ============== \n')
            y = net(x)
            print (y)


x: <class 'torch.Tensor'>
None
tensor([ 0.0019,  0.0074,  0.0174,  0.0154, -0.0056, -0.0060], device='cuda:0')


# Dataset

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
)

trainset = torchvision.datasets.CIFAR10(root='./data'
            , train=True, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                    shuffle=True, num_workers=2, pin_memory=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                      download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                        shuffle=False,num_workers=2, pin_memory=True)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [64]:
dataiter = iter(trainloader)
images, labels = dataiter.next()

print (type(images), type(labels))
print (images.size(), labels.size())

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([4, 3, 32, 32]) torch.Size([4])


In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3,16,5)
        self.pool  = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(16,64,5)
        self.fc1   = nn.Linear(64*5*5, 120)
        self.fc2   = nn.Linear(120,84)
        self.fc3   = nn.Linear(84,10)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

if __name__ == "__main__":
    net = Net().cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    
    for epoch in range(2):
        running_loss = 0.0
        for i, data in enumerate(trainloader):
            X, Y = data
            #X = X.to(torch.device('cuda'))
            #Y = Y.to(torch.device('cuda'))
            
            optimizer.zero_grad()
            
            Y_   = net(X.cuda())
            loss = criterion(Y_, Y.cuda())
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0 
    
    print ('Finished training')

[1,  2000] loss: 2.104
[1,  4000] loss: 1.742
[1,  6000] loss: 1.557
[1,  8000] loss: 1.487
[1, 10000] loss: 1.396
[1, 12000] loss: 1.341
[2,  2000] loss: 1.246
[2,  4000] loss: 1.192
[2,  6000] loss: 1.164
[2,  8000] loss: 1.144
[2, 10000] loss: 1.110
[2, 12000] loss: 1.099
Finished training


In [5]:
dataiter = iter(testloader)
images, labels = dataiter.next()
# images = images.to(torch.device('cuda'))
# labels = labels.to(torch.device('cuda'))

outputs = net(images.cuda())
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
                              for j in range(4)))

# print (outputs.data)
# print (predicted.data)

Predicted:    cat  ship  ship plane


In [7]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # images = images.to(torch.device('cuda'))
        # labels = labels.to(torch.device('cuda'))
        outputs = net(images.cuda())
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 61 %


In [84]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(torch.device('cuda'))
        labels = labels.to(torch.device('cuda'))
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 73 %
Accuracy of   car : 79 %
Accuracy of  bird : 58 %
Accuracy of   cat : 48 %
Accuracy of  deer : 47 %
Accuracy of   dog : 46 %
Accuracy of  frog : 62 %
Accuracy of horse : 76 %
Accuracy of  ship : 79 %
Accuracy of truck : 69 %


# Rough

In [10]:
x = torch.randn(1,1,32,32)
out = net(x)
print (out)


net.zero_grad()
out.backward(torch.randn(1,10))

tensor([[-0.1152, -0.1128,  0.0088,  0.1154,  0.0666,  0.2232,  0.0561,  0.0621,
         -0.0055,  0.0377]], grad_fn=<AddmmBackward>)


In [27]:
y      = net(x)
y_ = torch.randn(10)
y_ = y_.view(1,-1)

criterion = nn.MSELoss()
loss      = criterion(y, y_)
print (loss)

tensor(0.8655, grad_fn=<MseLossBackward>)


In [20]:
print (loss.grad_fn)
print (loss.grad_fn.next_functions)
print (loss.grad_fn.next_functions[0][0].next_functions)

<MseLossBackward object at 0x7f4446aa6160>
((<AddmmBackward object at 0x7f4446acc358>, 0),)
((<AccumulateGrad object at 0x7f4446accb70>, 0), (<ReluBackward0 object at 0x7f4446accac8>, 0), (<TBackward object at 0x7f4446acc978>, 0))


In [30]:
net.zero_grad()
print (net.conv1.bias.grad)

loss.backward(retain_graph=True)
print (net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])
tensor([-0.0281,  0.0012,  0.0232, -0.0006,  0.0155, -0.0261])


In [32]:
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr=0.01)
optimizer.zero_grad()
y = net(x)
loss= criterion(y, y_)
loss.backward()
optimizer.step()