In [1]:
from torchvision import transforms
import torch
import torchvision
from torch.nn import functional as F
import torchvision.transforms.functional as transFunc
import torch.nn as nn
import torch.optim as optim 

In [2]:
x = torch.tensor([[1,0,2], [2,2,3]])

In [3]:
x.shape

torch.Size([2, 3])

In [4]:
var = torch.rand(2,2)
print(var)
print(var.max())
print(var.max().item())

tensor([[0.8607, 0.1773],
        [0.8571, 0.4899]])
tensor(0.8607)
0.8607200980186462


In [5]:
torch.zeros(2,2)

tensor([[0., 0.],
        [0., 0.]])

In [6]:
torch.ones(2,2)

tensor([[1., 1.],
        [1., 1.]])

In [7]:
torch.zeros(2,2) + torch.ones(1,2) #Broadcasting

tensor([[1., 1.],
        [1., 1.]])

In [8]:
var.type()

'torch.FloatTensor'

In [9]:
cast = torch.tensor(var).to(dtype=torch.float16)

  """Entry point for launching an IPython kernel.


In [10]:
cast

tensor([[0.8608, 0.1774],
        [0.8569, 0.4900]], dtype=torch.float16)

In [11]:
#reshape
image = torch.rand(1024)
print(image.shape)
reshape_image = image.view(1, 32, 32)
print(reshape_image.shape)

torch.Size([1024])
torch.Size([1, 32, 32])


In [12]:
image.reshape(1, 32, 32).shape

torch.Size([1, 32, 32])

what the difference is between view() and reshape(). view() operates as a view on the original tensor, so if the underlying data is changed, the view will change too (and vice versa). However, view() can throw errors if the required view is not contiguous; that is, it doesn’t share the same block of memory it would occupy if a new tensor of the required shape was created from scratch. If this happens, you have to call tensor.contiguous() before you can use view(). However, reshape() does all that behind the scenes, so in general, I recommend using reshape() rather than view().

Generally image dimension are (h, w, c), but pyt accepts (c, h, w). To move dimension use permute.

In [13]:
reshape_image.shape # 1 is channel 

torch.Size([1, 32, 32])

In [14]:
reshape_image.permute(1,2,0).shape

torch.Size([32, 32, 1])

### template Dataset class

In [15]:
# class Dataset(object):
#     def __getitem__(self, index):
#         return NotImplementedError
    
#     def __len__():
#         return NotImplementedError

In [16]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='/home/mayur/Desktop/Pytorch/data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

Files already downloaded and verified


In [17]:
testset = torchvision.datasets.CIFAR10(root='/home/mayur/Desktop/Pytorch/data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified


In [18]:
#Label Mapping - classification of two class 

#CIFAR has 10 classes, we are restricting it into 2 classes
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar_2 = [(img, label_map[label1]) for img, label1 in trainset if label1 in [0,2]]
cifar2_val = [(img, label_map[label]) for img, label in testset if label in [0, 2]]

trainloader = torch.utils.data.DataLoader(cifar_2, batch_size=64,shuffle=True)
testloader = torch.utils.data.DataLoader(cifar2_val, batch_size=64,shuffle=False)

### Simplest Neural Network

In [19]:
class simpleNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(3072, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 128)
        self.fc4 = nn.Linear(128, 2)
        
    def forward(self, x):
        x = x.view(-1, 3072)
        out = F.relu(self.fc1(x))
        out = F.relu(self.fc2(out))
        out = F.relu(self.fc3(out))
        out = F.softmax(self.fc4(out))
        
        return out

### Why we down size the feature of the network ?

In general, you want the data in your layers to be compressed as it goes down the stack. If a layer is going to, say, 50 inputs to 100 outputs, then the network might learn by simply passing the 50 connections to 50 of the 100 outputs and consider its job done. By reducing the size of the output with respect to the input, we force that part of the network to learn a representation of the original input with fewer resources, which hopefully means that it extracts some features of the images that are important to the problem we’re trying to solve; for example, learning to spot a fin or a tail.

Loss function called CrossEntropyLoss, is recommended for multiclass categorization. It incorporates softmax as part of its operation.

### Optimizer

why Adam is widely used (as does RMSProp and AdaGrad) is that it uses a learning rate per parameter, and adapts that learning rate depending on the rate of change of those parameters. It keeps an exponentially decaying list of gradients and the square of those gradients and uses those to scale the global learning rate that Adam is working with. Adam has been empirically shown to outperform most other optimizers in deep learning networks, but you can swap out Adam for SGD or RMSProp or another optimizer to see if using a different technique yields faster and better training for your particular application.

In [20]:
img, label = cifar_2[0]

In [21]:
32*32*3

3072

In [22]:
"""
for epoch in range(1, 501):
    for batch in trainloader:
        optimizer.zero_grad()
        img, label = batch
        output = net(img)
        loss = loss_fn(output, label)
        loss.backward()
        optimizer.step()
        
"""

'\nfor epoch in range(1, 501):\n    for batch in trainloader:\n        optimizer.zero_grad()\n        img, label = batch\n        output = net(img)\n        loss = loss_fn(output, label)\n        loss.backward()\n        optimizer.step()\n        \n'

In [23]:
device = torch.device('cuda:0' if torch.cuda.is_available else 'cpu')

In [24]:
device

device(type='cuda', index=0)

### Combining the pieces

In [25]:
def train(model, optimizer, loss_fn, trainloader, testloader, n_epochs, device):
    for epoch in range(1, n_epochs+1):
        training_loss = 0.0
        valid_loss = 0.0
        
        model.train()
        for batch in trainloader:
            img, label = batch
            img = img.to(device)
            label = label.to(device)
            
            output = model(img)
            optimizer.zero_grad()
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item()
        training_loss /= len(trainset)
        
        model.eval()
        num_correct = 0
        num_examples = 0
        for batch in testloader:
            img, label = batch
            img = img.to(device)
            label = label.to(device)
            
            output = model(img)
            loss = loss_fn(output, label)
            valid_loss += loss.data.item()
            correct = torch.eq(torch.max(F.softmax(output),dim=1)[1], label).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
            
        valid_loss/=len(testset)
        
        print(f'Epoch: {epoch}, Training Loss: {training_loss:.3f}, Validation Loss: {valid_loss:.3f}, Accuracy: {num_correct/num_examples}')
            

In [29]:
net = simpleNetwork()
optimizer = optim.Adam(net.parameters(), lr=0.001)
net.to(device)
train(net, optimizer, torch.nn.CrossEntropyLoss(), trainloader, testloader, n_epochs=20, device=device)

  


Epoch: 1, Training Loss: 0.002, Validation Loss: 0.002, Accuracy: 0.804
Epoch: 2, Training Loss: 0.002, Validation Loss: 0.002, Accuracy: 0.807
Epoch: 3, Training Loss: 0.002, Validation Loss: 0.002, Accuracy: 0.815
Epoch: 4, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.814
Epoch: 5, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.8245
Epoch: 6, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.833
Epoch: 7, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.835
Epoch: 8, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.834
Epoch: 9, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.8355
Epoch: 10, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.829
Epoch: 11, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.83
Epoch: 12, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.837
Epoch: 13, Training Loss: 0.001, Validation Loss: 0.002, Accuracy: 0.8375
Epoch: 14, Training Loss: 0.001, Validation Loss: 0.002, A

In [59]:
img, label = cifar2_val[0]
img = transFunc.to_pil_image(img)
img = transform(img)
img = img.unsqueeze(0)
net.to('cpu')
prediction = net(img)
prediction = prediction.argmax()
print(class_names[prediction])

airplane


  


Model Saving
If you’re happy with the performance of a model or need to stop for any reason, you can save the current state of a model in Python’s pickle format by using the torch.save() method. Conversely, you can load a previously saved iteration of a model by using the torch.load() method.

Saving our current parameters and model structure would therefore work like this:

In [None]:
torch.save(simplenet, "/tmp/simplenet")
#And we can reload as follows:
simplenet = torch.load("/tmp/simplenet")

This stores both the parameters and the structure of the model to a file. 
This might be a problem if you change the structure of the model at a later point. For this reason, it’s more common to save a model’s state_dict instead. This is a standard Python dict that contains the maps of each layer’s parameters in the model. Saving the state_dict looks like this:

In [None]:
torch.save(model.state_dict(), PATH)

#To restore, create an instance of the model first and then use load_state_dict. For SimpleNet:

simplenet = net()
simplenet_state_dict = torch.load("/tmp/simplenet")
simplenet.load_state_dict(simplenet_state_dict)

The benefit here is that if you extend the model in some fashion, you can supply a strict=False parameter to load_state_dict that assigns parameters to layers in the model that do exist in the state_dict, but does not fail if the loaded state_dict has layers missing or added from the model’s current structure. Because it’s just a normal Python dict, you can change the key names to fit your model, which can be handy if you are pulling in parameters from a completely different model altogether.

In [28]:
numel_list = [p.numel()
for p in net.parameters()
if p.requires_grad == True]
sum(numel_list), numel_list

#Total Parameters: 1901954

(1901954, [1572864, 512, 262144, 512, 65536, 128, 256, 2])