In [1]:
import numpy as np
import torch
# if numpy and torch are not already install then install them using command in cmd; 
# pip install numpy
# for pytorch get the command from https://pytorch.org/ according to the preferences needed

In [2]:
# see the pytorch version
torch.__version__

'1.6.0'

In [3]:
a = np.array([3,5,7])
print(a)

[3 5 7]


In [4]:
# converting numpy array to tensor
# The returned tensor and numpy array share the same memory
t = torch.from_numpy(a)
print(t)

tensor([3, 5, 7], dtype=torch.int32)


In [5]:
print(f"Before update {t}")
t[2] = 11
print(f"After update {a}")
# "a" is also updated as both shares same memory location
print(a)

Before update tensor([3, 5, 7], dtype=torch.int32)
After update [ 3  5 11]
[ 3  5 11]


In [6]:
# appending to array
a = np.append(a, 9)
print(a)
# "t" tensor won't be updated, to update we again have to convert it into tensor 
print(t)

[ 3  5 11  9]
tensor([ 3,  5, 11], dtype=torch.int32)


In [7]:
t = torch.tensor(a)
print(t)

tensor([ 3,  5, 11,  9], dtype=torch.int32)


In [8]:
# making array with zero values and describing the datatype of elements
b = torch.zeros(3,3,dtype=torch.float64)
print(b)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)


In [9]:
# building manual dimension array with 3 rows and 5 columns
x = torch.zeros([3,5])
print(x)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


In [10]:
# multiplication of two array
x = torch.tensor([3,5])
y = torch.tensor([7,9])
print(f'x = {x}')
print(f'y = {y}')
print(f'x*y = {x*y}')
print(f'x.mul(y) = {x.mul(y)}')
print(f'x.mul(y).sum() = {x.mul(y).sum()}')
# matmul and @ multiply and sum both arrays
print(f'x.matmul(y) = {x.matmul(y)}')
print(f'x@y = {x@y}')

x = tensor([3, 5])
y = tensor([7, 9])
x*y = tensor([21, 45])
x.mul(y) = tensor([21, 45])
x.mul(y).sum() = 66
x.matmul(y) = 66
x@y = 66


In [11]:
# shape of array
x.shape

torch.Size([2])

In [12]:
# random values of array with 2x5 dimension
y = torch.rand(2,5)
print(y)

tensor([[0.2218, 0.0072, 0.0434, 0.2280, 0.5328],
        [0.9001, 0.2510, 0.7513, 0.1944, 0.8215]])


In [13]:
# reshaping the array
y.view([5,2])

tensor([[0.2218, 0.0072],
        [0.0434, 0.2280],
        [0.5328, 0.9001],
        [0.2510, 0.7513],
        [0.1944, 0.8215]])

Check the documentation **[here](https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html)** know more about torch.tensor **[here](https://pytorch.org/docs/stable/tensors.html)**

In [None]:
#Returns the index of a currently selected device.
torch.cuda.current_device()

In [None]:
# Context-manager that changes the selected device.
torch.cuda.device(device)
# device (torch.device or int) – device index to select. It’s a no-op if this argument is a negative integer or None.

In [None]:
# Returns the number of GPUs available.
torch.cuda.device_count()

In [None]:
# Gets the name of a device.
torch.cuda.get_device_name(0)

In [14]:
# Returns a bool indicating if CUDA is currently available
torch.cuda.is_available()

False

In [None]:
# Can specify if wants to run on CPU or GPU

# For CPU
device = torch.device("cpu")
# For GPU
device = torch.device("cuda:0")

x = torch.randn(N, D_in, device=device, dtype=dtype)

w1 = torch.randn(D_in, H, device=device, dtype=dtype)

torch.cuda **[documentation](https://pytorch.org/docs/stable/cuda.html)**

**Note:** If perfroming complex operations should use GPU power, or if operations are not too heavy we have to adjust learning rate and epochs

## Prediction - Using numpy

Numpy is a generic framework for scientific computing; it does not know anything about computation graphs, or deep learning, or gradients. However we can easily use numpy to fit a two-layer network to random data by manually implementing the forward and backward passes through the network using numpy operations

In [None]:
import numpy as np

#### Defining training data and weight

In [15]:
# in_train and out_train hold the datasets 
# in_train contains the input data and out_train contains the output data
# output data is 3 times of input data
in_train = np.array([2,3,4,5], dtype=np.float32)
out_train = np.array([6,9,12,15], dtype=np.float32)

# intializing weight
w = 0.0

#### Model Prediction

In [16]:
# forward method
# weights will be multiplied by the input data(in_train)
def forward(x):
    return w * x

#### Calculating Loss

In [17]:
# In this case loss calculation is done with MSELoss formula i.e. 1/N(y_pred - y_train)**2

def loss(y_train, y_pred):
    return ((y_pred - y_train)**2).mean()

#### Gradient 

In [18]:
# Using MSE (Mean Square Error) which is 

def gradient(x, y_pred, y_train):
    # return np.dot(2*x, (y_pred - y_train)).mean()    # varient of below line
    return ( 2*x *(y_pred - y_train)).mean()

In [19]:
print(f"Prediction before training: f(11): {forward(11):.3f}")

Prediction before training: f(11): 0.000


#### Training with data

In [20]:
learning_rate = 0.01
epoch = 40

for i in range(epoch):
    # prediction = forward pass
    y_pred_data = forward(in_train)
    
    # loss
    ls = loss(out_train, y_pred_data)
    
    # gradients
    dw = gradient(in_train, y_pred_data, out_train)
    
    # update weights
    w = w - (learning_rate * dw)
    
    # printing the data as it goes
    if (i+1) % 8 == 0:
        print(f"epoch {i}: w = {w:.5f}, loss = {ls:.10f}")
    

epoch 7: w = 2.75806, loss = 1.4828460217
epoch 15: w = 2.98049, loss = 0.0096441433
epoch 23: w = 2.99843, loss = 0.0000627254
epoch 31: w = 2.99987, loss = 0.0000004078
epoch 39: w = 2.99999, loss = 0.0000000027


In [21]:
print(f"Prediction after training: f(11): {forward(11):.3f}")

Prediction after training: f(11): 33.000


## Prediction - Using Tensor and Autograd

Numpy is a great framework, but it cannot utilize GPUs to accelerate its numerical computations. For modern deep neural networks, GPUs often provide speedups of 50x or greater, so unfortunately numpy won’t be enough for modern deep learning.

#### tensor:
PyTorch Tensors can utilize GPUs to accelerate their numeric computations. To run a PyTorch Tensor on GPU, you simply need to cast it to a new datatype.A PyTorch Tensor is conceptually identical to a numpy array: a Tensor is an n-dimensional array, and PyTorch provides many functions for operating on these Tensors.
If x is a Tensor that has x.requires_grad=True then x.grad is another Tensor holding the gradient of x with respect to some scalar value.

#### autograd:
In the above examples, we had to manually implement both the forward and backward passes of our neural network. Manually implementing the backward pass is not a big deal for a small two-layer network, but can quickly get very hairy for large complex networks.
Automatic differentiation to automate the computation of backward passes in neural networks. The autograd package in PyTorch provides exactly this functionality. When using autograd, the forward pass of your network will define a computational graph; nodes in the graph will be Tensors, and edges will be functions that produce output Tensors from input Tensors. Backpropagating through this graph then allows you to easily compute gradients.

In [22]:
import torch

#### Defining training data and weight

In [23]:
input_data = torch.tensor([8,9,10,11,12], dtype=torch.float32)
output_data = torch.tensor([32,36,40,44,48], dtype=torch.float32)

# Have to use requires_grad=True when need to calculate derivative/gradient with respect to varible
weight = torch.tensor(0, dtype=torch.float32, requires_grad=True)

#### Model Prediction

In [24]:
def forward(x):
    return x * weight

#### Calculating Loss

In [25]:
def loss(y_pred, y):
    return ((y_pred - y)**2).mean()

In [26]:
print(f"Prediction before training: f(13): {forward(13):.3f}")

Prediction before training: f(13): 0.000


#### Training with data

In [27]:
learning_rate = 1e-3
epoch = 50

for i in range(epoch):
    output_pred = forward(input_data)
    ls = loss(output_pred, output_data)
    
    # This is Autograd; calculates the gradient/derivative automatically
    ls.backward()
    if i % 5 == 0:
        print(f"epoch: {i+1}, weight = {weight:.5f}, loss = {ls:.5f}")
    # As the derivative has been calculted with autograd method we can get the value with weight.grad
    # Manually update weights using gradient descent. Wrap in torch.no_grad() because weights have requires_grad=True, 
    # but we don't need to track this in autograd.
    # If don't want to wrap the then simply use: weight.data = weight.data - learning_rate * weight.grad
    with torch.no_grad():
        weight -= (learning_rate * weight.grad) # or use weight.copy_(weight - (learning_rate * weight.grad))
        # Manually zero the gradients after updating weights
        weight.grad.zero_()

epoch: 1, weight = 0.00000, loss = 1632.00000
epoch: 6, weight = 2.72172, loss = 166.66740
epoch: 11, weight = 3.59150, loss = 17.02085
epoch: 16, weight = 3.86946, loss = 1.73825
epoch: 21, weight = 3.95828, loss = 0.17752
epoch: 26, weight = 3.98667, loss = 0.01813
epoch: 31, weight = 3.99574, loss = 0.00185
epoch: 36, weight = 3.99864, loss = 0.00019
epoch: 41, weight = 3.99956, loss = 0.00002
epoch: 46, weight = 3.99986, loss = 0.00000


In [28]:
print(f"Prediction before training: f(13): {forward(13):.3f}")

Prediction before training: f(13): 51.999


We can define our own **Custom autograd function** by defining class having constructor and forward method, for details see **[here](https://pytorch.org/tutorials/beginner/pytorch_with_examples.html#pytorch-defining-new-autograd-functions)**

## Using nn and optim module

#### nn module: 
The nn package defines a set of Modules, which are roughly equivalent to neural network layers. A Module receives input Tensors and computes output Tensors, but may also hold internal state such as Tensors containing learnable parameters. The nn package also defines a set of useful loss functions that are commonly used when training neural networks.

#### optim module: 
The optim package in PyTorch abstracts the idea of an optimization algorithm and provides implementations of commonly used optimization algorithms. Up to this point we have updated the weights of our models by manually mutating the Tensors holding learnable parameters (with torch.no_grad() or .data to avoid tracking history in autograd). This is not a huge burden for simple optimization algorithms like stochastic gradient descent, but in practice we often train neural networks using more sophisticated optimizers like AdaGrad, RMSProp, Adam, etc.

In [29]:
import torch
import torch.nn as nn

#### Defining training data and test tensor

In [31]:
# Here data is taken in 2D shape as later we are modeling data with Linear transformation 
# where only the feature numbers (column numbers of matrix) will be passed
input_data = torch.tensor([[3],[4],[5],[6],[7]], dtype=torch.float)
output_data = torch.tensor([[6],[8],[10],[12],[14]], dtype=torch.float)
# This is the tensor which we going to predict
test = torch.tensor([15], dtype=torch.float)

#### Modeling Data using nn module

In [32]:
# Applies a linear transformation to the incoming data: y = x*W^T + b

in_feature = input_data.shape[1] # returns column numbers or use num_rows,num_cols=input_data.shape
out_feature = output_data.shape[1]

# Replacing the manual forward method, weights and bais
model = nn.Linear(in_feature, out_feature)

To better understand nn.Linear go to stackflow or **[click here](https://stackoverflow.com/questions/54916135/what-is-the-class-definition-of-nn-linear-in-pytorch)**

In [33]:
print(f"Prediction before training: f(15) = {model(test).item():.5f}")

Prediction before training: f(15) = -12.29544


#### Training with data

In [36]:
learning_rate = 0.01
epoch = 500
# Replacing manual loss function with nn module
# The nn package also contains definitions of popular loss functions
# Loss function is callable; recieves similar arguements as before i.e. (y_pred, y)
loss = nn.MSELoss()
# Use the optim package to define an Optimizer that will update the weights of the model for us.
# Here we will use SGD(stochastic gradient descent); the optim package contains many other optimization algorithms. 
# The first argument to the SGD constructor tells the optimizer which Tensors it should update.
# torch.optim: Contains optimizers such as SGD, which update the weights of Parameter during the backward step
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for i in range(epoch):
    # Calling forward function since the froward operation is now happeing with Linear transformation
    output_pred = model(input_data)
    # Compute and print loss
    ls = loss(output_pred, output_data)
    if i%50==49:
        # unpack the weight and bais
        [w,b] = model.parameters()
        # this will be lists of list so using w[0][0]
        print(f"epoch = {i+1}, weight = {w[0][0].item()}, loss = {ls:.15f}")
    # Before the backward pass, use the optimizer object to zero all of the gradients for the variables 
    # it will update (which are the learnable weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward() is called.
    optimizer.zero_grad()
    # Backward pass: compute gradient of the loss with respect to model parameters
    ls.backward()
    # Calling the step function on an Optimizer makes an update to its parameters(weights)
    optimizer.step()

epoch = 50, weight = 1.9289791584014893, loss = 0.010838186368346
epoch = 100, weight = 1.9338905811309814, loss = 0.009390989318490
epoch = 150, weight = 1.938462495803833, loss = 0.008137014694512
epoch = 200, weight = 1.9427181482315063, loss = 0.007050529122353
epoch = 250, weight = 1.9466794729232788, loss = 0.006109020207077
epoch = 300, weight = 1.9503668546676636, loss = 0.005293296184391
epoch = 350, weight = 1.9537992477416992, loss = 0.004586515948176
epoch = 400, weight = 1.9569942951202393, loss = 0.003974071703851
epoch = 450, weight = 1.9599684476852417, loss = 0.003443434368819
epoch = 500, weight = 1.9627368450164795, loss = 0.002983632031828


In [37]:
print(f"Prediction after training: f(15) = {model(test).item():5f}")

Prediction after training: f(15) = 29.642254


We can create our **Custom nn Module** by defining class having constructor and forward method, for details see **[here](https://pytorch.org/tutorials/beginner/pytorch_with_examples.html#pytorch-custom-nn-modules)** Below is the example

In [None]:
# model = nn.Linear(in_feature, out_feature)

# Module: creates a callable which behaves like a function, but can also contain state(such as neural net layer weights). 
# It knows what Parameter (s) it contains and can zero all their gradients, loop through them for weight updates, etc.
class custom_linear(nn.Module):
    
    def __init__(self, in_dim, out_dim):
        super(custom_linear,self).__init__()
        # In the constructor we instantiate two nn.Linear modules and assign them as member variables.
        # define layers
        self.lin1 = nn.Linear(in_dim, out_dim)
    
    def forward(self, x):
        # In the forward function we accept a Tensor of input data and we must return a Tensor of output data. 
        # We can use Modules defined in the constructor as well as arbitrary operators on Tensors.
        return self.lin1(x)

model = custom_linear(in_feature,out_feature)