# Tensors

In [1]:
import torch
import numpy as np

In [3]:
torch.tensor([1, 2, 3]) # Creates a tensor from a list or numpy array

tensor([1, 2, 3])

In [None]:
torch.Tensor(2, 3) # Creates a tensor with all elements set to zero

In [None]:
torch.zeros(2, 3) # Creates a tensor with all elements set to zero

In [None]:
torch.randint(10, size=(2, 5)) # Creates a tensor filled with random integers

In [None]:
torch.rand((3, 4)) # Creates a tensor filled with random float

In [None]:
torch.numel(torch.rand((3, 4))) # Returns the total number of elements in the tensor

In [None]:
torch.rand((2, 4)) @ torch.rand((4, 6)) # Matrix product of tensors

In [None]:
torch.zeros((3, 4)).shape # Returns the shape of the tensor alias for tensor.size()

In [None]:
a = torch.randint(10, (5, 4))

In [None]:
torch.rot90(a) # Rotate a tensor by 90 degrees

In [None]:
a.rot90(2) # Rotate a n-D tensor by 180 degrees

In [None]:
a.sum() # Returns the sum of all elements

In [None]:
a.max() # Returns the maximum value of all elements

In [None]:
a.argmax() # Returns the indices of the maximum value of all elements

In [None]:
a

In [None]:
torch.max(a, 0) # Returns the indices of the maximum value and its value

In [None]:
np.unravel_index(a.argmax(), a.shape) # There is no torch.unravel_index but you can use np.unravel_index 

In [None]:
a

In [None]:
a.view((2, -1)) # Returns a new tensor with the same data as the self tensor but of a different shape

In [None]:
a = torch.randint(10, (5, 5))
a

In [None]:
b = a.clone() # copy of the tensor
b

In [None]:
b += 10

In [None]:
a

In [None]:
b

In [None]:
b.dtype # data type

In [None]:
numpy_array = np.random.randint(10, size=(2, 3))
numpy_array

In [None]:
tensor = torch.from_numpy(numpy_array) # turn numpy array into torch tensor
tensor

In [None]:
another_np_array = tensor.numpy() # turn torch tensor into numpy array
another_np_array

In [None]:
a = torch.zeros((5))
a

In [None]:
a.unsqueeze(1) # Converting a horizontal tensor to a vertical one
a

# CUDA

In [None]:
import torch

In [None]:
x = torch.rand((3, 9))
x

In [None]:
torch.cuda.is_available() # Returns a bool indicating if CUDA is currently available

In [None]:
torch.device('cuda:0') # This is an object representing the device on which a tensor is or will be allocated (graphics card)

In [None]:
torch.device('cpu') # for cpu

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [None]:
x_on_cuda = x.to(device) # Transfer of the tensor to another device

In [None]:
big_tensor = torch.rand(2000, 30000)

In [None]:
big_tensor_on_cuda = big_tensor.to(device)

In [None]:
%%time
y = big_tensor + big_tensor**2 * big_tensor / 2

In [None]:
%%time
y_cuda = big_tensor_on_cuda + big_tensor_on_cuda**2 * big_tensor_on_cuda / 2

# Backpropagation (autograd) 

In [6]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [7]:
x = torch.tensor([[1., 2., 3.], [4., 5., 6.]], requires_grad=True)

gpu = torch.device('cpu')
device = torch.device('cuda')
x = x.to(device)

x.retain_grad()
# x.requires_grad_() # delete requires_grad=True from first line

In [8]:
function = 10 * (x**2).sum()
function.backward()
x.grad

tensor([[ 20.,  40.,  60.],
        [ 80., 100., 120.]], device='cuda:0')

In [11]:
# history
print(function.grad_fn, ' - multiplication')
print(function.grad_fn.next_functions[0][0], ' - sum')
print(function.grad_fn.next_functions[0][0].next_functions[0][0], ' - exponentiation')

<MulBackward0 object at 0x0000023557553BC8>  - multiplication
<SumBackward0 object at 0x0000023557553C48>  - sum
<PowBackward0 object at 0x0000023557553BC8>  - exponentiation


In [10]:
x.data -= 0.001 * x.grad

In [None]:
x.grad

In [None]:
x.grad.zero_() 

In [None]:
x = torch.tensor([8., 8.], requires_grad=True)
var_history = []
fn_history = []

optimizer = torch.optim.SGD([x], lr=0.001)

def function_parabola(variable):
    return 10 * (variable ** 2).sum()

def make_gradient_step(function, variable):
    function_result = function(variable)
    function_result.backward()
    optimizer.step()
    optimizer.zero_grad()
    
for i in range(500):
    var_history.append(x.data.numpy().copy())
    fn_history.append(function_parabola(x).data.cpu().numpy().copy())
    make_gradient_step(function_parabola, x)

In [None]:
def show_contours(objective,
                  x_lims=[-10.0, 10.0], 
                  y_lims=[-10.0, 10.0],
                  x_ticks=100,
                  y_ticks=100):
    x_step = (x_lims[1] - x_lims[0]) / x_ticks
    y_step = (y_lims[1] - y_lims[0]) / y_ticks
    X, Y = np.mgrid[x_lims[0]:x_lims[1]:x_step, y_lims[0]:y_lims[1]:y_step]
    res = []
    for x_index in range(X.shape[0]):
        res.append([])
        for y_index in range(X.shape[1]):
            x_val = X[x_index, y_index]
            y_val = Y[x_index, y_index]
            res[-1].append(objective(np.array([[x_val, y_val]]).T))
    res = np.array(res)
    plt.figure(figsize=(7,7))
    plt.contour(X, Y, res, 100)
    plt.xlabel('$x_1$')
    plt.ylabel('$x_2$')

In [None]:
show_contours(function_parabola)
plt.scatter(np.array(var_history)[:,0], np.array(var_history)[:,1], s=10, c='r');

# Neural Networks

## Regression problem (Prediction of a sinusoidal function)

In [5]:
import torch
import matplotlib.pyplot as plt

### Data preparation

In [None]:
# Training Dataset
x_train = torch.rand(100) * 20.0 - 10.0
y_train = torch.sin(x_train) + torch.randn(x_train.shape) / 5
x_train.unsqueeze_(1)
y_train.unsqueeze_(1)
plt.plot(x_train.numpy(), y_train.numpy(), 'o')
plt.title('noisy sin(x)')
plt.xlabel('x_train')
plt.ylabel('y_train');

In [None]:
# Validation Dataset
x_validation = torch.linspace(-10, 10, 100)
y_validation = torch.sin(x_validation.data)
x_validation.unsqueeze_(1)
y_validation.unsqueeze_(1);
plt.plot(x_validation.numpy(), y_validation.numpy(), 'o')
plt.title('sin(x)')
plt.xlabel('x_validation')
plt.ylabel('y_validation');

### Network architecture

In [None]:
class SineNet(torch.nn.Module):
    def __init__(self, n_hidden_neurons):
        super(SineNet, self).__init__()
        self.fc1 = torch.nn.Linear(1, n_hidden_neurons) # Fully connected layer
        self.act1 = torch.nn.Sigmoid() # Sigmoidal activation function
        self.fc2 = torch.nn.Linear(n_hidden_neurons, 1)

    def forward(self, x): # Forward propagation
        x = self.fc1(x)
        x = self.act1(x)
        x = self.fc2(x)
        return x

sine_net = SineNet(50) # You can change number of neurons

In [None]:
def predict(net, x, y):
    y_pred = net.forward(x)

    plt.plot(x.numpy(), y.numpy(), 'o', label='Groud truth')
    plt.plot(x.numpy(), y_pred.data.numpy(), 'o', c='r', label='Prediction');
    plt.legend(loc='upper left')
    plt.xlabel('$x$')
    plt.ylabel('$y$')

predict(sine_net, x_validation, y_validation)

### Learning algorithm (optimizer)

In [None]:
optimizer = torch.optim.Adam(sine_net.parameters(), lr=0.01)

### Learning process

In [None]:
def loss(pred, target): # loss function MSE (mean square error)
    squares = (pred - target) ** 2
    return squares.mean()

In [None]:
for epoch_index in range(2000):
    optimizer.zero_grad()

    y_pred = sine_net.forward(x_train)
    loss_val = loss(y_pred, y_train)

    loss_val.backward()

    optimizer.step()

predict(sine_net, x_validation, y_validation)

## The classification problem (Recognition of digits)

In [None]:
import torch
import random
import numpy as np
import matplotlib.pyplot as plt

### Data preparation

In [None]:
import torchvision.datasets

MNIST_train = torchvision.datasets.MNIST('./', download=True, train=True)
MNIST_test = torchvision.datasets.MNIST('./', download=True, train=False)

In [None]:
X_train = MNIST_train.train_data
y_train = MNIST_train.train_labels
X_test = MNIST_test.test_data
y_test = MNIST_test.test_labels

X_train = X_train.unsqueeze(1).float()
X_test = X_test.unsqueeze(1).float()

In [None]:
plt.imshow(X_train[0, 0])
plt.show()
print(y_train[0])

### Network architecture

In [None]:
class LeNet5(torch.nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        
        self.conv1 = torch.nn.Conv2d(
            in_channels=1, out_channels=6, kernel_size=5, padding=2)
        self.act1  = torch.nn.Tanh()
        self.pool1 = torch.nn.AvgPool2d(kernel_size=2, stride=2)
       
        self.conv2 = torch.nn.Conv2d(
            in_channels=6, out_channels=16, kernel_size=5, padding=0)
        self.act2  = torch.nn.Tanh()
        self.pool2 = torch.nn.AvgPool2d(kernel_size=2, stride=2)
        
        self.fc1   = torch.nn.Linear(5 * 5 * 16, 120)
        self.act3  = torch.nn.Tanh()
        
        self.fc2   = torch.nn.Linear(120, 84)
        self.act4  = torch.nn.Tanh()
        
        self.fc3   = torch.nn.Linear(84, 10)
    
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.act1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.act2(x)
        x = self.pool2(x)
        
        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))

        x = self.fc1(x)
        x = self.act3(x)
        x = self.fc2(x)
        x = self.act4(x)
        x = self.fc3(x)
        
        return x
    
lenet5 = LeNet5()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
lenet5 = lenet5.to(device)

### Learning algorithm (optimizer)

In [None]:
# optimizer = torch.optim.SGD(lenet5.parameters(), lr=1.0e-3)
optimizer = torch.optim.Adam(lenet5.parameters(), lr=1.0e-3)

### Learning process

In [None]:
loss = torch.nn.CrossEntropyLoss()

In [None]:
batch_size = 100

test_accuracy_history = []
test_loss_history = []

X_test = X_test.to(device)
y_test = y_test.to(device)

for epoch in range(10):
    order = np.random.permutation(len(X_train))
    for start_index in range(0, len(X_train), batch_size):
        optimizer.zero_grad()
        
        batch_indexes = order[start_index:start_index+batch_size]
        
        X_batch = X_train[batch_indexes].to(device)
        y_batch = y_train[batch_indexes].to(device)
        
        preds = lenet5.forward(X_batch) 
        
        loss_value = loss(preds, y_batch)
        loss_value.backward()
        
        optimizer.step()
        
    test_preds = lenet5.forward(X_test)
    test_loss_history.append(loss(test_preds, y_test).data.cpu())
    
    accuracy = (test_preds.argmax(dim=1) == y_test).float().mean().data.cpu()
    test_accuracy_history.append(accuracy)
    
    print(accuracy)

In [None]:
plt.plot(test_loss_history);

In [None]:
plt.plot(test_accuracy_history);