# Torch Basics

In [2]:
import torch
import numpy as np

In [3]:
#convert numpy to tensor or vice versa
X_original=np.array([[1,2,3,4],[5,6,7,8]])
print('Original Data')
print(X_original)
X_torch = torch.from_numpy(X_original)
print('Converted to torch object', X_torch.type())
print(X_torch)
X_numpy = X_torch.numpy()
print('Converted to numpy', X_numpy.dtype)
print(X_numpy)

Original Data
[[1 2 3 4]
 [5 6 7 8]]
Converted to torch object torch.LongTensor
tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])
Converted to numpy int64
[[1 2 3 4]
 [5 6 7 8]]


In [4]:
#Math with PyTorch objects
print('Convert torch object to a float tensor object')
X_torch = X_torch.float()
print(X_torch.type())
print(X_torch)
print('sin')
print(torch.sin(X_torch))
print('exp')
print(X_torch.exp())
print('abs')
print(torch.sin(X_torch).abs())
print('sigmoid')
print(X_torch.sigmoid())
print('mean')
print(torch.mean(X_torch))

Convert torch object to a float tensor object
torch.FloatTensor
tensor([[1., 2., 3., 4.],
        [5., 6., 7., 8.]])
sin
tensor([[ 0.8415,  0.9093,  0.1411, -0.7568],
        [-0.9589, -0.2794,  0.6570,  0.9894]])
exp
tensor([[2.7183e+00, 7.3891e+00, 2.0086e+01, 5.4598e+01],
        [1.4841e+02, 4.0343e+02, 1.0966e+03, 2.9810e+03]])
abs
tensor([[0.8415, 0.9093, 0.1411, 0.7568],
        [0.9589, 0.2794, 0.6570, 0.9894]])
sigmoid
tensor([[0.7311, 0.8808, 0.9526, 0.9820],
        [0.9933, 0.9975, 0.9991, 0.9997]])
mean
tensor(4.5000)


In [5]:
#In contrast, math with numpy
print('Convert numpy array to a float numpy array')
X_numpy = X_numpy.astype(float)
print(X_numpy.dtype)
print('sin')
print(np.sin(X_numpy))
print('exp')
print(np.exp(X_numpy))
print('abs')
print(np.abs(np.sin(X_numpy)))
print('mean')
print(np.mean(X_numpy))

Convert numpy array to a float numpy array
float64
sin
[[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]
 [-0.95892427 -0.2794155   0.6569866   0.98935825]]
exp
[[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01]
 [1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03]]
abs
[[0.84147098 0.90929743 0.14112001 0.7568025 ]
 [0.95892427 0.2794155  0.6569866  0.98935825]]
mean
4.5


In [6]:
#making torch tensors
#empty tensor
A = torch.empty(5,3)
print(A) 
#filled with random numbers
A = torch.rand(5,3)
print(A)
#filled with zeros
A=torch.zeros(5,3, dtype=torch.long)
print(A)
#filled with ones
A=torch.ones(5,3, dtype=torch.long)
print(A)
#making a tensor from data
A = torch.tensor([[2.3,6.4,8.5],[1.1,4.4,3.]])
print(A)

tensor([[ 0.0000e+00, -2.5244e-29, -1.5042e+31],
        [-2.0005e+00,  5.6052e-45,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00, -2.5244e-29, -1.5019e+31]])
tensor([[0.1408, 0.6017, 0.9808],
        [0.9822, 0.1319, 0.0996],
        [0.0726, 0.7683, 0.2835],
        [0.7106, 0.3597, 0.7049],
        [0.1850, 0.7154, 0.4848]])
tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
tensor([[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]])
tensor([[2.3000, 6.4000, 8.5000],
        [1.1000, 4.4000, 3.0000]])


In [7]:
#operations with torch tensors
#get size, it'll be a torch size object
print('size of A:', A.size())
#resize tensors
print('resize tensors')
A=A.view(3,2)
print(A)
print(A.size())

size of A: torch.Size([2, 3])
resize tensors
tensor([[2.3000, 6.4000],
        [8.5000, 1.1000],
        [4.4000, 3.0000]])
torch.Size([3, 2])


In [8]:
#add tensors
print('Add tensors')
B=torch.rand(A.size())
print(A+B)
# or use torch operations
print(torch.add(A,B))

Add tensors
tensor([[2.6275, 7.3160],
        [8.5987, 1.1222],
        [4.8443, 3.5318]])
tensor([[2.6275, 7.3160],
        [8.5987, 1.1222],
        [4.8443, 3.5318]])


In [9]:
#multiply tensors
print('Multiply tensors')
data = [[1,2], [3,4]]
tensor = torch.FloatTensor(data)  # 32-bit floating point
print('numpy:', np.matmul(data, data))
print('torch:', torch.mm(tensor, tensor))

Multiply tensors
numpy: [[ 7 10]
 [15 22]]
torch: tensor([[ 7., 10.],
        [15., 22.]])


In [10]:
#get value of 1-element tensor
print(A[0,0].item())

2.299999952316284


In [11]:
#numpy bridge, torch tensor and numpy array share same memory location!
C = torch.ones(3)
print(C)
D = C.numpy()
print('D:', D, 'C:', C)

tensor([1., 1., 1.])
D: [1. 1. 1.] C: tensor([1., 1., 1.])


In [12]:
D[2]=4
print('D:', D, 'C:', C)

D: [1. 1. 4.] C: tensor([1., 1., 4.])


In [13]:
C[1]=2
print('D:', D, 'C:', C)

D: [1. 2. 4.] C: tensor([1., 2., 4.])


In [14]:
# can also concatenate two tensors together on various dimensions
x = torch.randn(3, 3)
y = torch.randn(3, 3)
z = torch.cat((x, y), 1)
print("z:", z)
print("z.size()", z.size())

z: tensor([[-0.7789, -0.3291, -0.0742, -0.2087,  0.2622, -1.2285],
        [-0.5661, -1.1376, -0.0303, -0.6101,  0.6286, -0.1224],
        [ 0.7302,  0.9954, -0.4946,  0.3608,  0.1927,  0.1650]])
z.size() torch.Size([3, 6])


# Sample Neural Network on MNIST dataset

In [15]:
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from torchvision.datasets.mnist import MNIST
import torch.optim as optim
import os 
import time
from tqdm import tqdm  # this is a nice library for making nice looking progress bars

In [16]:
linear_layer1 = nn.Linear(in_features=3, out_features=6)

In [17]:
x = torch.randn(3, 3)
x2 = linear_layer1(x)
print(x2)

tensor([[ 0.1303, -0.3429,  0.1492,  0.4129,  1.0282,  0.5080],
        [-0.2372,  0.2043, -0.2989,  0.4768,  0.8333,  0.1873],
        [ 0.5576,  0.2275, -1.2040,  0.7007, -0.1425,  0.6203]],
       grad_fn=<AddmmBackward>)


In [18]:
linear_layer1.weight

Parameter containing:
tensor([[ 0.5552,  0.5200, -0.0170],
        [-0.1057, -0.2718,  0.5311],
        [ 0.0287, -0.4084, -0.0815],
        [-0.0317,  0.1445, -0.0781],
        [-0.4319, -0.3663, -0.4280],
        [ 0.5097,  0.2445,  0.1543]], requires_grad=True)

In [19]:
linear_layer1.bias

Parameter containing:
tensor([ 0.1464,  0.4599, -0.5521,  0.4773,  0.2089,  0.5598],
       requires_grad=True)

In [20]:
mnist = MNIST(root=".", download=True, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ]))

In [21]:
# check out the size of the first sample in mnist
mnist[0][0].size()

torch.Size([1, 28, 28])

## Neural Network model as an object

In [22]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        """
        define model attributes
        """
        self.layer1 = nn.Linear(28*28, 512)
        self.layer2 = nn.Linear(512, 512)
        self.layer3 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        """
        define model
        """
        x = x.view(-1, 28*28)
        h1 = self.relu(self.layer1(x))
        h2 = self.relu(self.layer2(h1))
        logits = self.relu(self.layer3(h2))
        
        return logits

## Class to train model with mnist dataset (plain python code)

In [23]:
class Trainer(object):
    def __init__(self, model, save_dir):
        self.model = model
        self.save_dir = save_dir

    def train(self, train, val, epochs, batch_size, log_per_batches, learning_rate, device):

        run_id = time.clock()  # give this training run a unique id, we'll use this later when we save our model

        # batch data and pass to iterator
        trainloader = DataLoader(train, batch_size=batch_size, shuffle=True)  # pass training data to pytorch's data loader (an iterator)

        if val is not None:
            valloader = DataLoader(val, batch_size=batch_size, shuffle=True)  # pass validation data to pytorch's dataloader (an iterator)

        # stuff for you to play with!
        loss_fn = nn.CrossEntropyLoss()  # you will need to pick an appropriate loss function for the model you're building
        optimizer = optim.Adam(self.model.parameters(), learning_rate)  # adam is typically the standard choice but feel free to play around w this
        
        # keep a running loss that we'll average over - this gives us a smoother estimate on the loss
        running_train_loss = 0
        running_val_loss = 0

        # we'll also keep track of our previous loss on the validation set and use this figure out when to stop training 
        prev_val_loss = 0
        
        # begin train loop
        for epoch in range(epochs):
            print("Epoch %s" % epoch)

            train_bar = tqdm(enumerate(trainloader, 1), total=len(trainloader))  # wrap with tqdm to make a pretty progress bar

            # iterate over training data
            for i, data in train_bar:
                x, targets = data

                optimizer.zero_grad()  # call to zero gradients

                predictions = self.model(x)  # pass x to model and return predictions
                loss = loss_fn(predictions, targets)  # calculate loss on this batch of predictions
                running_train_loss += loss  # add to our running total
                loss.backward()  # backpropogate the loss and update model parameters
                optimizer.step()  # update optimizer paraeters

                if i % log_per_batches == 0:  # print every `log_per_batches` batches
                    avg_train_loss = running_train_loss / log_per_batches  # average the loss 
                    train_bar.set_description("Training Loss: %.3f" % avg_train_loss)  # update the description of the tqdm progress bar 
                    running_train_loss = 0  # reset running loss to zero

            # iterate over validation data
            if val is not None:
                val_bar = tqdm(enumerate(valloader, 1), total=len(valloader))  # wrap with tqdm to make a pretty progress bar
                with torch.no_grad():  # use `torch.no_grad()` so information about our val set does not get backpropgated
                    for i, data in val_bar:
                        x, targets = data

                        # everything below is the same as the training loop, but notice that we don't need to backprop loss/update optimizer
                        predictions = self.model(x)
                        loss = loss_fn(predictions, targets)
                        running_val_loss += loss
                    # average over the entire validation sett
                    avg_val_loss = running_val_loss / len(valloader)
                    val_bar.set_description("Validation Loss: %.3f" % avg_val_loss)

                #set an early stopping condition
                # there's a variety of other conditions we can use, but this is nice and simple
                if avg_val_loss > prev_val_loss:  # model is still improving!
                    state = {"state_dict": self.model.state_dict()}
                    if self.save_dir is not None:
                        try:
                            os.makedirs(save_dir)  # make save_dir if save_dir does not already exist
                        except FileExistsError:  # save_dir already exists
                            pass

                        # save path will be `save_dir/run_id/epoch_{epoch_number}`
                        save_path = os.path.join(self.save_dir(os.path.join(run_id, "epoch_"+str(epoch))))  
                        torch.save(state, save_path) # save model

                else:  # model is beginning to overfit
                    return  # so we stop training

## Train model!

In [24]:
# create model and trainer
model = Model()
trainer = Trainer(model=model, save_dir="./models")

In [25]:
trainer.train(train=mnist, val=None, 
              epochs=10, batch_size=32, 
              log_per_batches=10, learning_rate=0.001, device="cpu")

  
  0%|          | 2/1875 [00:00<01:36, 19.38it/s]

Epoch 0


Training Loss: 0.423: 100%|██████████| 1875/1875 [00:42<00:00, 38.91it/s]
  0%|          | 4/1875 [00:00<00:48, 38.63it/s]

Epoch 1


Training Loss: 0.532: 100%|██████████| 1875/1875 [00:51<00:00, 36.22it/s]
  0%|          | 4/1875 [00:00<00:52, 35.86it/s]

Epoch 2


Training Loss: 0.632: 100%|██████████| 1875/1875 [00:54<00:00, 34.52it/s]
  0%|          | 4/1875 [00:00<00:52, 35.32it/s]

Epoch 3


Training Loss: 0.512: 100%|██████████| 1875/1875 [00:33<00:00, 55.24it/s]  
  0%|          | 4/1875 [00:00<00:55, 33.55it/s]

Epoch 4


Training Loss: 0.500: 100%|██████████| 1875/1875 [01:00<00:00, 29.06it/s]
  0%|          | 3/1875 [00:00<01:03, 29.46it/s]

Epoch 5


Training Loss: 0.458: 100%|██████████| 1875/1875 [01:05<00:00, 28.48it/s]
  0%|          | 4/1875 [00:00<01:01, 30.65it/s]

Epoch 6


Training Loss: 0.391: 100%|██████████| 1875/1875 [01:05<00:00, 28.73it/s]
  0%|          | 3/1875 [00:00<01:17, 24.24it/s]

Epoch 7


Training Loss: 0.417: 100%|██████████| 1875/1875 [01:07<00:00, 27.96it/s]
  0%|          | 3/1875 [00:00<01:15, 24.88it/s]

Epoch 8


Training Loss: 0.579: 100%|██████████| 1875/1875 [01:08<00:00, 27.28it/s]
  0%|          | 3/1875 [00:00<01:09, 26.75it/s]

Epoch 9


Training Loss: 0.501: 100%|██████████| 1875/1875 [01:10<00:00, 26.57it/s]


## References

#documentation
https://pytorch.org/docs/stable/nn.html
#tutorials
https://pytorch.org/tutorials/
https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
#forums run by the people that crafted PyTorch
https://discuss.pytorch.org