# Initialitation of tensor

## Creating a tensor from data
A tensor can be created directly from data or from an array

In [1]:
# Importing numpy and torch
import torch
import numpy as np
# Creating data 
data = [[1,2],[3,4]] # 
# Tensor from data
x_data = torch.tensor(data)
# Creating array
np_array = np.array(data)
# Tensor from array
x_np = torch.tensor(np_array)

A tensor can also be created using the properties of some data or an array using "like" commands

In [2]:
# Tensor of ones with size and data type property copied from x_data
x_ones = torch.ones_like(x_data)
# Tensor with random values with size property copied from x_data, data type property is overruled
x_rand = torch.rand_like(x_data, dtype=torch.float)

## With random or constant values
A tensor can also be created using random values or constant values

In [3]:
# Define shape of tensor
shape = (2,3,)
# Generate random tensor
rand_tensor = torch.rand(shape)
# Tensor with ones
ones_tensor = torch.ones(shape)
# Zero tensor
zeros_tensor = torch.zeros(shape)

# Tensor properties
A tensor has three properties besides its values which is shape, data type and associated device (CPU or GPU)

In [4]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


# Tensor operations
Many tensor operations exist (over 100, https://pytorch.org/docs/stable/torch.html)
Most PyTorch operations work just as NumPy operations.
To speed up computation the tensors can be transferred to GPU if one is available

## Transferring to GPU

In [5]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
  tensor = tensor.to('cuda')

## Some simple operations

In [6]:
# Creating tensor
tensor = torch.ones(4, 4)

In [7]:
# Indexing and assignment
tensor[:,1] = 0
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [8]:
# Concattenation 
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [9]:
# Elementwise multiplication
tensor2 = tensor.mul(2)
print("tensor = tensor.mul(2)")
print(tensor2)

tensor3 = tensor2.mul(tensor2)
print("tensor3 = tensor2.mul(tensor2)")
print(tensor3)

tensor = tensor.mul(2)
tensor([[2., 0., 2., 2.],
        [2., 0., 2., 2.],
        [2., 0., 2., 2.],
        [2., 0., 2., 2.]])
tensor3 = tensor2.mul(tensor2)
tensor([[4., 0., 4., 4.],
        [4., 0., 4., 4.],
        [4., 0., 4., 4.],
        [4., 0., 4., 4.]])


In [10]:
# Matrix multiplication
tensor4 = tensor.matmul(tensor.T)
print("tensor = tensor.mul(tensor)")
print(tensor4)

tensor = tensor.mul(tensor)
tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


# Tensors and Numpy
Tensors and NumPy arrays can share the same memory location and thus changing one will change the other

## Numpy to Tensors

In [17]:
# Create tensor from data
t = torch.ones(5)
print(f"t: {t}")
# Coverting to numpy array
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


In [18]:
# Adding 1 to tensor also affect NumPy array
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


## Tensor to NumPy

In [19]:
# Create NumPy array
n = np.ones(5)
t = torch.from_numpy(n)

In [20]:
# Adding 1 to NumPy array affects tensor
np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
n: [2. 2. 2. 2. 2.]


# Training a simple neural network
'torch.autograd' is PyTorch’s automatic differentiation engine that powers neural network training.

For this example, we load a pretrained resnet18 model from torchvision. We create a random data tensor to represent a single image with 3 channels, and height & width of 64, and its corresponding label initialized to some random values.

Training a NN happens in two steps:

* Forward Propagation: In forward prop, the NN makes its best guess about the correct output. It runs the input data through each of its functions to make this guess.

* Backward Propagation: In backprop, the NN adjusts its parameters proportionate to the error in its guess. It does this by traversing backwards from the output, collecting the derivatives of the error with respect to the parameters of the functions (gradients), and optimizing the parameters using gradient descent. 


In [13]:
import torch, torchvision # import PyTorch packages
model = torchvision.models.resnet18(pretrained=True) # import resnet18 model
data = torch.rand(1, 3, 64, 64) # generate random data
labels = torch.rand(1, 1000) # generate random labels

We run the input data through the model through each of its layers to make a prediction. This is the forward pass.

In [14]:
prediction = model(data) # forward pass

We use the model’s prediction and the corresponding label to calculate the error (loss). 

The next step is to backpropagate this error through the network. Backward propagation is kicked off when we call .backward() on the error tensor. 
Autograd then calculates and stores the gradients for each model parameter in the parameter’s .grad attribute.

In [15]:
loss = (prediction - labels).sum() # Calculating loss (prediction error)
loss.backward() # backward pass

Now we optimize over the model variables using gradient descent to mminimise prediction error.

In this example we use Stochastic Gradient Descent (SGD) with momentum
To do this we need to set two "hyperparameters", namely learning rate (step size) and momentum.

The learning rate determines how quickly the model adapts to the problem. A small learning rate means a smaller change in network weights each training iteration whereas a bigger laerning rate means bigger changes in network weights. Choosing the learning rate is important to ensure that the model does not converge to suboptimal solutions (too big) as well as not getting stuck (too small). Here we choose 0.01

The momentum is a hyperparameter which determines the number of points that is used for exponetial weighting. Specifically we average over the last 1/(1 - momentum) points. A good value is often 0.9 i.e. averaging over last 10 points. 





In [16]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) # loading SGD optimizer on model parameters

We use the .step() to initiate gradient descent. The optimizer adjusts each parameter by its gradient stored in .grad.

In [17]:
optim.step() #gradient descent