In [1]:
import torch

In [2]:
x=torch.rand(3,2, 2)
print(x)

print (torch.__version__)

tensor([[[0.3512, 0.6443],
         [0.0743, 0.1211]],

        [[0.2680, 0.8293],
         [0.7656, 0.8478]],

        [[0.0930, 0.2398],
         [0.3536, 0.6490]]])
1.10.0


In [3]:
# create a tensors
ts = torch.Tensor(x)

In [4]:
x.type(), ts.shape


('torch.FloatTensor', torch.Size([3, 2, 2]))

In [5]:
# channel first
ts

tensor([[[0.3512, 0.6443],
         [0.0743, 0.1211]],

        [[0.2680, 0.8293],
         [0.7656, 0.8478]],

        [[0.0930, 0.2398],
         [0.3536, 0.6490]]])

In [15]:
# to know which device
ts.device

device(type='cpu')

In [17]:
# to put it on new device
#ts.to(device) # device= device_name : gpu or else

In [90]:
ts.shape # => it is channel first

torch.Size([3, 2, 2])

In [6]:
# create tensors like 
torch.zeros_like(ts)

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [12]:
# create tensors like 
torch.zeros_like(ts).shape

torch.Size([3, 2, 2])

In [14]:
torch.ones_like(ts)

tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])

In [16]:
torch.randn_like(ts)

tensor([[[-0.1450, -0.3536],
         [ 0.1732, -0.5340]],

        [[-1.4056,  0.0913],
         [-1.2498, -0.7992]],

        [[-0.6458, -0.5966],
         [ 0.3463, -0.1280]]])

In [19]:
torch.rand_like(ts) # the diff is that they are all positive number

tensor([[[0.3942, 0.9899],
         [0.6222, 0.9364]],

        [[0.5337, 0.5158],
         [0.0728, 0.3121]],

        [[0.7280, 0.9533],
         [0.6479, 0.3007]]])

In [20]:
# program machine to learn implicitly by itself 
##=> artificial intelligence

In [9]:
import torch.nn as nn

In [10]:
linear=nn.Linear(10, 2)

In [11]:
type(linear)

torch.nn.modules.linear.Linear

In [12]:
inp=torch.randn(3, 10)

In [13]:
inp

tensor([[ 0.0744, -0.5053, -0.6831,  1.5158,  0.2813, -0.0693, -1.6677,  1.9224,
         -0.3988,  0.4374],
        [ 0.7357,  0.4680, -1.1798,  0.5303,  1.0260,  1.4791, -1.1641, -2.5352,
         -2.2819, -0.8712],
        [ 0.0910,  1.0212,  0.7148,  0.3808,  0.4776, -0.0509, -1.4244,  0.8427,
         -0.0464,  1.5076]])

In [14]:
# a quick dense layer
output=linear(inp)

In [15]:
output

tensor([[-0.5811, -0.5773],
        [ 0.1391,  0.3573],
        [-0.6355, -1.1354]], grad_fn=<AddmmBackward0>)

In [16]:
output.shape

torch.Size([3, 2])

In [17]:
# let's pass it to the relu
relu=nn.ReLU()

In [18]:
relu_output=relu(output)

In [19]:
relu_output

tensor([[0.0000, 0.0000],
        [0.1391, 0.3573],
        [0.0000, 0.0000]], grad_fn=<ReluBackward0>)

In [20]:
# Optimization
import torch.optim as optim

mlp_layer = nn.Sequential(nn.Linear(5, 2), nn.BatchNorm1d(2), nn.ReLU())
mlp_layer

Sequential(
  (0): Linear(in_features=5, out_features=2, bias=True)
  (1): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
)

In [21]:
inp=torch.randn(5, 5) + 1
mlp_layer(inp)

tensor([[0.0000, 1.3022],
        [0.0000, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 1.0771],
        [1.9866, 0.0000]], grad_fn=<ReluBackward0>)

In [22]:
adam_opt= optim.Adam(mlp_layer.parameters(),lr=1e-1 )

In [23]:
adam_opt

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.1
    weight_decay: 0
)

In [28]:
# training exaples
train_ex = torch.randn(100, 5) + 1

# zero_ing out the grad
adam_opt.zero_grad()

# create the loss function
curr_loss = torch.abs(1- mlp_layer(train_ex)).mean()

# backward propagation
curr_loss.backward()


In [25]:
curr_loss

tensor(0.7858, grad_fn=<MeanBackward0>)

In [32]:
adam_opt.step()

In [33]:
curr_loss

tensor(0.7285, grad_fn=<MeanBackward0>)

In [None]:
# loop over many step to perform the trianing

Tensors are a specialized data structure that are very similar to arrays and matrices. In PyTorch, we use tensors to encode the inputs and outputs of a model, as well as the model’s parameters.

Tensors are similar to `NumPy’s ndarrays`, except that `tensors can run on GPUs or other hardware accelerators`. In fact, tensors and NumPy arrays can often share the same underlying memory, eliminating the need to copy data. 

Tensors are also optimized for automatic differentiation ( `Autograd` ). If you’re familiar with ndarrays, you’ll be right at home with the Tensor API. If not, follow along!

In [34]:
import numpy as np
data = [[1, 2],[3, 4]]

np_array = np.array(data)

x_data = torch.from_numpy(np_array)

x_data

tensor([[1, 2],
        [3, 4]])

From another tensor:

The new tensor retains the properties (shape, datatype) of the argument tensor, unless explicitly overridden.



In [125]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.9851, 0.0418],
        [0.3785, 0.1223]]) 



In [127]:
#x_data

In [71]:
#With random or constant values:

#shape is a tuple of tensor dimensions. In the functions below, it determines the dimensionality of 
# the output tensor.

shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.6987, 0.6062, 0.9995],
        [0.6105, 0.5326, 0.2657]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


#### Attributes of a Tensor
Tensor attributes describe their shape, datatype, and the device on which they are stored.

In [128]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Operations on Tensors
Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing, indexing, slicing), sampling, etc

In [73]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
    tensor = tensor.to("cuda")

In [129]:
# slicing like numpy
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


**Joining tensors** You can use torch.cat to concatenate a sequence of tensors along a given dimension.

In [130]:
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


#### Arithmetic operations

In [131]:
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)

y3 = torch.rand_like(y1)
torch.matmul(tensor, tensor.T, out=y3)


# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

**Single-element tensors** If you have a one-element tensor, for example by aggregating all values of a tensor into one value, you can convert it to a Python numerical value using item():

In [132]:
agg = tensor.sum()
agg_item = agg.item()
print(agg_item, type(agg_item))

12.0 <class 'float'>


**In-place operations**: Operations that store the result into the operand are called in-place. They are denoted by a _ suffix. For example: x.copy_(y), x.t_(), will change x.

In [80]:
print(f"{tensor} \n")
tensor.add_(5)
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


**In-place operations save some memory, but can be problematic when computing derivatives because of an immediate loss of history. Hence, their use is discouraged.**

### Bridge with NumPy
Tensors on the CPU and NumPy arrays can share their underlying memory locations, and changing one will change the other.



In [83]:
# tensor to numpy
t = torch.ones(5)
print(f"t: {t}\n")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])

n: [1. 1. 1. 1. 1.]


In [84]:
# A change in the tensor reflects in the NumPy array.
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")


t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


In [87]:
# Numpy array to Tensor
n = np.ones(5)
t = torch.from_numpy(n)

print(f"{n}\n")
print(f"{t}\n")

[1. 1. 1. 1. 1.]

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)



In [88]:
#Changes in the NumPy array reflects in the tensor.
np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
n: [2. 2. 2. 2. 2.]


## QUICKSTART

### Working with data


PyTorch has two primitives to work with data: `torch.utils.data.DataLoader` and `torch.utils.data.Dataset`.

- **Dataset** stores the samples and their corresponding labels, 
- and **DataLoader** wraps an iterable around the Dataset.

In [48]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

PyTorch offers domain-specific libraries such as `TorchText, TorchVision, and TorchAudio`, all of which include datasets. For this tutorial, we will be using a **TorchVision** dataset.


Every `TorchVision` Dataset includes two arguments: `transform` and `target_transform` to modify the samples and labels respectively.

In [133]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

We pass the `Dataset` as an argument to `DataLoader`. This wraps an iterable over our dataset, and supports automatic `batching, sampling, shuffling and multiprocess` data loading. 

Here we define a batch size of 64, i.e. each element in the dataloader iterable will return a batch of 64 features and labels

In [51]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


### Creating Models

To define a neural network in PyTorch, we create a class that inherits from `nn.Module`. We define the layers of the network in the `__init__` function and specify how data will pass through the network in the `forward function`. To accelerate operations in the neural network, we move it to the GPU if available.

In [52]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


### Optimizing the Model Parameters
To train a model, we need a `loss function and an optimizer`.

In [53]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and backpropagates the prediction error to adjust the model’s parameters.

In [54]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train() # model is fitted here
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
            

In [55]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

The training process is conducted over several iterations (epochs). During each epoch, the model learns parameters to make better predictions. We print the model’s accuracy and loss at each epoch; we’d like to see the accuracy increase and the loss decrease with every epoch.

In [56]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.308942  [    0/60000]
loss: 2.289799  [ 6400/60000]
loss: 2.270063  [12800/60000]
loss: 2.258061  [19200/60000]
loss: 2.247024  [25600/60000]
loss: 2.209643  [32000/60000]
loss: 2.217358  [38400/60000]
loss: 2.185217  [44800/60000]
loss: 2.189134  [51200/60000]
loss: 2.147979  [57600/60000]
Test Error: 
 Accuracy: 47.9%, Avg loss: 2.140819 

Epoch 2
-------------------------------
loss: 2.159720  [    0/60000]
loss: 2.141727  [ 6400/60000]
loss: 2.081944  [12800/60000]
loss: 2.094977  [19200/60000]
loss: 2.044767  [25600/60000]
loss: 1.981107  [32000/60000]
loss: 2.006166  [38400/60000]
loss: 1.928531  [44800/60000]
loss: 1.942596  [51200/60000]
loss: 1.857719  [57600/60000]
Test Error: 
 Accuracy: 58.8%, Avg loss: 1.854838 

Epoch 3
-------------------------------
loss: 1.898327  [    0/60000]
loss: 1.859793  [ 6400/60000]
loss: 1.737964  [12800/60000]
loss: 1.776949  [19200/60000]
loss: 1.667548  [25600/60000]
loss: 1.623609  [32000/600

### Saving Models
A common way to save a model is to serialize the internal state dictionary (containing the model parameters).

In [57]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


### Loading Models
The process for loading a model includes re-creating the model structure and loading the state dictionary into it.

In [59]:
model = NeuralNetwork()
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

In [60]:
# now, let's make prediction
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')


Predicted: "Ankle boot", Actual: "Ankle boot"


In [61]:
# ref : https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html