In [3]:
from __future__ import print_function
import torch
import numpy as np

# Introduction
## Tensors
Tensors are multidimentional arrays. PyTorch tensors are similar to NumPy's n-dimentional arrays.But, PyTorch tensors can be used on a GPU to accelarate computing.
PyTorch supports multiple types of tensors, including:
* FloatTensor: 32-bit float
* DoubleTensor: 64-bit float
* HalfTensor: 16-bit float
* IntTensor: 32-bit int
* LongTensor: 64-bit int

In [14]:
# initializea numpy array
a = np.array(1)
# Initialize a tensor
b = torch.tensor(1)

print(a)
print(b)

1
tensor(1)


## Mathematical operations


With numpy array

In [16]:
# intilaizing 2 arrays
a = np.array(2)
b = np.array(1)
print(a,b)
# addition
print(a+b)
# substraction
print(a-b)
# multiplication
print(a*b)

2 1
3
1
2


With PyTorch

In [18]:
# Initializing 2 tensors
a = torch.tensor(2)
b = torch.tensor(1)
print(a,b)
# addition
print(a+b)
# substraction
print(a-b)
# multiplication
print(a*b)

tensor(2) tensor(1)
tensor(3)
tensor(1)
tensor(2)


## Matrix Initialization

Let's construct an uninitialized matrix of 5x3.

In [19]:
x = torch.empty(5,3)
print(x)
print(x.shape)

tensor([[1.1112e-38, 9.5511e-39, 1.0102e-38],
        [1.0286e-38, 1.0194e-38, 9.6429e-39],
        [9.2755e-39, 9.1837e-39, 9.3674e-39],
        [1.0745e-38, 1.0653e-38, 9.5510e-39],
        [1.0561e-38, 1.0194e-38, 1.1112e-38]])
torch.Size([5, 3])


Let's construct a randomly initialized matrix

In [20]:
x = torch.rand(5,3)
print(x)

tensor([[0.8532, 0.5282, 0.5074],
        [0.0581, 0.5695, 0.7304],
        [0.8216, 0.5241, 0.7374],
        [0.3378, 0.5755, 0.5188],
        [0.8280, 0.5067, 0.7876]])


Let's construct a matrix filled with zeros and of dtyp long

In [9]:
x = torch.zeros(5,3, dtype=torch.long) 
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


## Matrix operations

In [21]:
# setting the random seed for pytorch and initializing two tensors
torch.manual_seed(42)
a = torch.randn(3,3)
b = torch.randn(3,3)

In [22]:
# matrix addition
print(torch.add(a,b), '\n')

# matrix subtraction
print(torch.sub(a,b), '\n')

# matrix multiplication
print(torch.mm(a,b), '\n')

# matrix division
print(torch.div(a,b))

tensor([[ 0.6040,  0.6637,  1.0438],
        [ 1.3406, -2.8127, -1.1753],
        [ 3.1662,  0.6841,  1.2788]]) 

tensor([[ 0.0693, -0.4061, -0.5749],
        [-0.8800,  0.5669,  0.8026],
        [ 1.2502, -1.9601, -0.3555]]) 

tensor([[ 0.4576,  0.2724,  0.3367],
        [-1.3636,  1.7743,  1.1446],
        [ 0.3243,  2.8696,  2.7954]]) 

tensor([[ 1.2594,  0.2408,  0.2897],
        [ 0.2075,  0.6645,  0.1884],
        [ 2.3051, -0.4826,  0.5649]])


let's apply a matrix transposition

In [23]:
# original matrix
print(a, '\n')

# matrix transpose
torch.t(a)

tensor([[ 0.3367,  0.1288,  0.2345],
        [ 0.2303, -1.1229, -0.1863],
        [ 2.2082, -0.6380,  0.4617]]) 



tensor([[ 0.3367,  0.2303,  2.2082],
        [ 0.1288, -1.1229, -0.6380],
        [ 0.2345, -0.1863,  0.4617]])

## Concatenating tensors

In [24]:
# Initializing 2 tensors
a = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[5,6],[7,8]])
print(a, '\n')
print(b)

tensor([[1, 2],
        [3, 4]]) 

tensor([[5, 6],
        [7, 8]])


In [25]:
# concatenating vertically
torch.cat((a,b))

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

In [26]:
# concatenating horizontally
torch.cat((a,b),dim=1)

tensor([[1, 2, 5, 6],
        [3, 4, 7, 8]])

## Reshaping tensors

In [27]:
# setting the random seed for pytorch
torch.manual_seed(42)
# initializing tensor
a = torch.randn(2,4)
print(a)
a.shape

tensor([[ 0.3367,  0.1288,  0.2345,  0.2303],
        [-1.1229, -0.1863,  2.2082, -0.6380]])


torch.Size([2, 4])

We can use the .reshape() function and pass the required shape as a parameter. Let’s try to convert the above tensor of shape (2,4) to a tensor of shape (1,8)

In [28]:
# reshaping tensor
b = a.reshape(1,8)
print(b)
b.shape

tensor([[ 0.3367,  0.1288,  0.2345,  0.2303, -1.1229, -0.1863,  2.2082, -0.6380]])


torch.Size([1, 8])

We can convert NumPy arays to tensors

In [29]:
# initializing a numpy array
a = np.array([[1,2],[3,4]])
print(a, '\n')

# converting the numpy array to tensor
tensor = torch.from_numpy(a)
print(tensor)

[[1 2]
 [3 4]] 

tensor([[1, 2],
        [3, 4]], dtype=torch.int32)


# Common PyTorch modules
## Autograd module

In [30]:
# initializing a tensor
a = torch.ones((2,2), requires_grad=True)
a

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [31]:
# performing operations on the tensor
b = a + 5
c = b.mean()
print(b,c)

tensor([[6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>) tensor(6., grad_fn=<MeanBackward0>)


Now, the derivative of c w.r.t. a will be ¼ and hence the gradient matrix will be 0.25. Let’s verify this using PyTorch:

In [34]:
# back propagating
c.backward()

# computing gradients
print(a.grad)

tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])


## Optim module
The Optim module in PyTorch has pre-written codes for most of the optimizers that are used while building a neural network.

In [22]:
# importing the optim module
from torch import optim

# adam
## adam = optim.Adam(model.parameters(), lr=learning_rate)

## nn Module
The autograd module in PyTorch helps us define computation graphs as we proceed in the model. But, just using the autograd module can be low-level when we are dealing with a complex neural network.

In those cases, we can make use of the nn module. This defines a set of functions, similar to the layers of a neural network, which takes the input from the previous state and produces an output.

# Building a Neural Network from Scratch 
## Building Neural Network with PyTorch v1


We initialize the inupt and output

In [4]:
#Input tensor
X = torch.Tensor([[1,0,1,0],[1,0,1,1],[0,1,0,1]])

#Output
y = torch.Tensor([[1],[1],[0]])

print(X, '\n')
print(y)

tensor([[1., 0., 1., 0.],
        [1., 0., 1., 1.],
        [0., 1., 0., 1.]]) 

tensor([[1.],
        [1.],
        [0.]])


Now, we define the sigmoid function that will act as activation function and the derivative of the sigmoid function which will help for the backpropagation step

In [5]:
#Sigmoid Function
def sigmoid (x):
    return 1/(1 + torch.exp(-x))

#Derivative of Sigmoid Function/
def derivatives_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

Then we initialize the parameters of our model: the number of epochs, learning rate, weights, biases...

In [6]:
# variable initialization
epoch = 1000 # setting training iterations
lr = 0.1 # setting learning rate
inputlayer_neurons = X.shape[1] #number of features in data set
hiddenlayer_neurons = 3 # number of hidden layers
output_neurons = 1 # number of neurons in output layer

#weight and bias initialization
wh=torch.randn(inputlayer_neurons, hiddenlayer_neurons).type(torch.FloatTensor)
bh=torch.randn(1, hiddenlayer_neurons).type(torch.FloatTensor)
wout=torch.randn(hiddenlayer_neurons, output_neurons)
bout=torch.randn(1, output_neurons)

In [11]:
for  i in range(epoch):
    # Forward propagation
    hidden_layer_input1 = torch.mm(X, wh)
    hidden_layer_input = hidden_layer_input1 + bh
    hidden_layer_activations = sigmoid(hidden_layer_input)
    
    output_layer_input1 = torch.mm(hidden_layer_activations, wout)
    output_layer_input = output_layer_input1 + bout
    output = sigmoid(output_layer_input)
    
    # Backpropagation
    E = y-output
    slope_output_layer = derivatives_sigmoid(output)
    slope_hidden_layer = derivatives_sigmoid(hidden_layer_activations)
    d_output = E * slope_output_layer
    Error_at_hidden_layer = torch.mm(d_output, wout.t())
    d_hiddenlayer = Error_at_hidden_layer * slope_hidden_layer
    wout += torch.mm(hidden_layer_activations.t(), d_output) *lr
    bout += d_output.sum() *lr
    wh += torch.mm(X.t(), d_hiddenlayer) *lr
    bh += d_output.sum() *lr

In [12]:
print('actual :\n', y, '\n')
print('predicted :\n', output)

actual :
 tensor([[1.],
        [1.],
        [0.]]) 

predicted :
 tensor([[0.9943],
        [0.9362],
        [0.0915]])


## Building Neural Network with Numpy

In [14]:
# N = batch size ; D_in is input dimension ; H = hidden dimension; D_out = output dimension
N, D_in, H, D_out = 64, 1000, 100, 10

# create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly intialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6

for t in range(500):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h,0)
    y_pred = h_relu.dot(w2)
    
    # compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)
    
    # Backprop to compute gradients of w1 and w2 with repect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 28310293.388650276
1 24046455.947607122
2 22485748.33202292
3 20449816.26516888
4 17098542.74024948
5 12708479.319280751
6 8609402.027216952
7 5462124.682564227
8 3430326.340406754
9 2204568.6675023
10 1493119.928472691
11 1072276.3243737863
12 813773.3048788816
13 645344.4338662495
14 528451.0129421465
15 442429.13691785757
16 376134.80888993223
17 323226.900301171
18 279952.71777665603
19 243916.1059332097
20 213506.65311874566
21 187649.65123356023
22 165513.35636200514
23 146454.19939037465
24 129970.45396461268
25 115661.10143721373
26 103173.31128772677
27 92254.4870180487
28 82675.08478930833
29 74246.21033616638
30 66812.73378047648
31 60238.35651494601
32 54405.91963871397
33 49221.13063303138
34 44604.13049982973
35 40483.84899006938
36 36798.75356089586
37 33496.57185901111
38 30532.39838244312
39 27868.582626639665
40 25470.145375360466
41 23307.17061950656
42 21353.7582883859
43 19586.237084515713
44 17984.715498949383
45 16532.385672097145
46 15212.808881699213
47 14013

431 0.0002752255035583401
432 0.0002648689137513053
433 0.0002549053318723759
434 0.0002453202650721338
435 0.00023609805085858423
436 0.000227226615846206
437 0.00021869067259245586
438 0.0002104786807829538
439 0.00020257831854698642
440 0.00019497581673589534
441 0.00018766134824538733
442 0.00018062336627349407
443 0.00017385153431205287
444 0.0001673357476930672
445 0.00016106604967005287
446 0.00015503335266641978
447 0.00014922813060062033
448 0.00014364231137341298
449 0.00013826730148892643
450 0.00013309478783933551
451 0.00012811731329369504
452 0.00012332742537102994
453 0.00011871813161587876
454 0.00011428248621431217
455 0.00011001388007456255
456 0.00010590620062950182
457 0.0001019525373885689
458 9.814756970620537e-05
459 9.448593634173123e-05
460 9.096207196079588e-05
461 8.757045166464037e-05
462 8.430595253143273e-05
463 8.116410971504952e-05
464 7.814024572888228e-05
465 7.523002312246211e-05
466 7.242887822334919e-05
467 6.973274341267073e-05
468 6.71378393801687

## Building Neural Network with PyTorch v2

In [17]:
dtype = torch.float
device = "cpu"

# N = batch size ; D_in is input dimension ; H = hidden dimension; D_out = output dimension
N, D_in, H, D_out = 64, 1000, 100, 10

In [19]:
# Create random Tensors to hold input and outputs.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Tensors during the backward pass.
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)

In [20]:
# Create random Tensors for weights.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
w1 = torch.randn(D_in, H, device=device, dtype=dtype, requires_grad=True)
w2 = torch.randn(H, D_out, device=device, dtype=dtype, requires_grad=True)

In [21]:
learning_rate = 1e-6
for t in range(500):
    # Forward pass: compute predicted y using operations on Tensors; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call w1.grad and w2.grad will be Tensors holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    # An alternative way is to operate on weight.data and weight.grad.data.
    # Recall that tensor.data gives a tensor that shares the storage with
    # tensor, but doesn't track history.
    # You can also use torch.optim.SGD to achieve this.
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # Manually zero the gradients after updating weights
        w1.grad.zero_()
        w2.grad.zero_()

99 648.1469116210938
199 3.491764545440674
299 0.03566964715719223
399 0.0007340770680457354
499 8.137375698424876e-05


# nn module
In pytorch, the `nn` package enables the definition of neural network layers, called **modules**. The `nn` package fot `pytorch` is like `keras` for `Tensorflow`. A module receives input Tensors and computes output Tensors. The `nn` package also provides some commonly used loss functions.
Here is an example if using the `nn` package to implement a two-layer network.

In [23]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# use the nn package to define our model as a sequence of layers.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-4
for t in range(500):
    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(x)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

99 2.33319354057312
199 0.07260344177484512
299 0.007553726434707642
399 0.001089669531211257
499 0.000166371013619937


# optim module