# General practices in pytorch

-----

Importing some pytorch modules 

In [None]:
import torch
import matplotlib.pyplot as plt

-----

Converting an array or list to tensor

In [None]:
#Tensors are similar to NumPy’s ndarrays, except that tensors can run on GPUs or other specialized hardware 
#to accelerate computing by computing in multiple parallel cores. 
#Tensors
data = [[1,2],[3,4],[5,6]]
x_data = torch.tensor(data)
#type(data)
#type(x_data)
x_data.shape

torch.Size([3, 2])

----

Fixed seeds for reproducible results

In [None]:
torch.manual_seed(0)
X = torch.rand((3,3),dtype=torch.float32)
print(X)

tensor([[0.4963, 0.7682, 0.0885],
        [0.1320, 0.3074, 0.6341],
        [0.4901, 0.8964, 0.4556]])


In [None]:
torch.manual_seed(1)
Y = torch.rand((3,3))
print(Y)

tensor([[0.7576, 0.2793, 0.4031],
        [0.7347, 0.0293, 0.7999],
        [0.3971, 0.7544, 0.5695]])


In [None]:
torch.manual_seed(0)
Z = torch.rand((3,3),dtype=torch.float32)
print(Z)

tensor([[0.4963, 0.7682, 0.0885],
        [0.1320, 0.3074, 0.6341],
        [0.4901, 0.8964, 0.4556]])


----
# Load and transform Standard Dataset


In [None]:
from torchvision import transforms

In [None]:
#Transforms image into flattened tensors and normalizes them to the range of -1 and 1
#Normalization helps get data within a range and reduces the skewness which helps learn or optimize the weights and biases faster and better

transform = torchvision.transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)),transforms.Lambda(lambda x: torch.flatten(x))])

# https://pytorch.org/vision/stable/_modules/torchvision/datasets/mnist.html#MNIST
# http://yann.lecun.com/exdb/mnist/index.html
# Training data = 60000
# Testing data = 10000

training_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

test_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=26421880.0), HTML(value='')))


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=29515.0), HTML(value='')))


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4422102.0), HTML(value='')))


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=5148.0), HTML(value='')))


Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
train_dataloader = torch.utils.data.DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)

# Custom dataset 
https://pytorch.org/tutorials/recipes/recipes/custom_dataset_transforms_loader.html#write-a-simple-helper-function-to-show-an-image
----

---
# Creating basic neural network
https://pytorch.org/docs/stable/nn.html

linear neural network

In [None]:
# nn module depends on autograd module of pytorch to define models and differentiate them. 
# An nn.Module contains layers, and a method forward(input) that returns the output.

class NN(torch.nn.Module):
  #layers of a neura network
  def __init__(self,input_size,num_classes):
    super(NN,self).__init__()
    self.fc1 = torch.nn.Linear(input_size,50)
    self.fc2 = torch.nn.Linear(50,50)
    self.fc3 = torch.nn.Linear(50,num_classes)
    # 1 input image channel, 6 output channels, 5x5 square convolution
    # This takes too much time to be trained on as the total learnable parameters are too high
    #self.conv1 = torch.nn.Conv2d(1, 6, 5)

  #feed-forward or activation functions
  def forward(self,x):
    x = torch.nn.functional.relu(self.fc1(x))
    x = torch.nn.functional.relu(self.fc2(x))
    x = self.fc2(x)
    return x

Summary of the network

In [None]:
# We can see the summary of the network that we have designed
from torchsummary import summary

model = NN(784,10)
summary(model,input_size=(1,784))
# Total parameters are sum of trainable params and params that needs to be used as transfer learning params

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 50]          39,250
            Linear-2                [-1, 1, 50]           2,550
            Linear-3                [-1, 1, 50]           2,550
Total params: 44,350
Trainable params: 44,350
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.17
Estimated Total Size (MB): 0.17
----------------------------------------------------------------


Convolutional neural networks

In [None]:
# Convolutional neural network
class CNN(torch.nn.Module):
    def __init__(self, in_channels, num_classes):
        super(CNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=in_channels,out_channels=6,kernel_size=(3, 3),stride=(1, 1),padding=(1, 1))
        self.pool = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = torch.nn.Conv2d(in_channels=6,out_channels=16,kernel_size=(3, 3),stride=(1, 1),padding=(1, 1))
        self.fc1 = torch.nn.Linear(16 * 7 * 7, num_classes)
        self.initialize_weights()

    def forward(self, x):
        x = torch.nn.functional.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.nn.functional.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)

        return x

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv2d):
                torch.nn.init.kaiming_uniform_(m.weight)

                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)

            elif isinstance(m, torch.nn.BatchNorm2d):
                torch.nn.init.constant_(m.weight, 1)
                torch.nn.init.constant_(m.bias, 0)

            elif isinstance(m, torch.nn.Linear):
                torch.nn.init.kaiming_uniform_(m.weight)
                torch.nn.init.constant_(m.bias, 0)

In [None]:
model = CNN(3,10)
summary(model,(3,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             168
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 14, 14]             880
         MaxPool2d-4             [-1, 16, 7, 7]               0
            Linear-5                   [-1, 10]           7,850
Total params: 8,898
Trainable params: 8,898
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.07
Params size (MB): 0.03
Estimated Total Size (MB): 0.12
----------------------------------------------------------------


In [None]:
from torchvision import models
model = models.vgg16()
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

-----
Hyperparameters

In [None]:
# Fun fact: GPT-3 is trained on 175B hyper-parameters
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
# The number of epochs is a hyperparameter that defines the number times that 
#the learning algorithm will work through the entire training dataset.
num_epochs = 5

Creating a model variable and setting the device

Initialize network

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = NN(input_size=input_size, num_classes=num_classes).to(device)

In [None]:
model = NN(input_size=input_size, num_classes=num_classes).to(device)

Loss and optimizer


In [None]:
# Loss function
criterion = torch.nn.CrossEntropyLoss()

# Stochastic gradient descent optimization algorithm
# parameters that needs to be optimized, learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Train Network
for epoch in range(num_epochs):
    #running_loss = 0.0
    #loss_values = []
    for batch_idx, (data, targets) in enumerate(train_dataloader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Get to correct shape
        data = data.reshape(data.shape[0], -1)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        #running_loss += loss.item()
        #loss_values.append(running_loss / len(train_dataloader))

        # backward
        # we should always zero out all the parameter gradients before we optimize the params
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()


# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

    model.train()
    return num_correct/num_samples

#import matplotlib.pyplot as plt
#plt.plot(loss_values)
print(f"Accuracy on training set: {check_accuracy(train_dataloader, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(test_dataloader, model)*100:.2f}")

Accuracy on training set: 90.80
Accuracy on test set: 87.44


References
----

I have considered some of the youtube tutorials to create this example.

Some of the ideas are from the pytorch documentation and the others are from Aladdin Persson's youtube playlist on pytorch
https://youtu.be/Jy4wM2X21u0