In [1]:
import torch
import numpy as np

# Initializing a Tensor

## Directly from data

In [3]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
x_data

tensor([[1, 2],
        [3, 4]])

## From a NumPy array

In [5]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
x_np

tensor([[1, 2],
        [3, 4]])

## From another tensor:

In [6]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.7979, 0.9246],
        [0.6536, 0.0757]]) 



## With random or constant values:

In [7]:
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.7160, 0.4428, 0.1366],
        [0.6557, 0.6136, 0.1643]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


# Attributes of a Tensor

In [10]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


# Operations on Tensors

[Operations](https://pytorch.org/docs/stable/torch.html) are similar to NumPy. They can be run on the GPU (at typically higher speeds than on a CPU). If you’re familiar with the NumPy library, you’ll find the Tensor operations to be similar.

By default, tensors are created on the CPU. We need to explicitly move tensors to the GPU using .to method (after checking for GPU availability). Keep in mind that copying large tensors across devices can be expensive in terms of time and memory!

In [11]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
    tensor = tensor.to("cuda")

## Standard numpy-like indexing and slicing:

In [12]:
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


## Joining tensors

In [24]:
# torch.cat() concatenate a sequence of tensors along a given dimension.
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1.shape)
print(t1)

torch.Size([4, 12])
tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [23]:
# torch.stack() stacks a sequence of tensors along a new dimension.
# All tensors need to be of the same size.
t2 = torch.stack([tensor, tensor, tensor], dim=1)
print(t2.shape)
t2

torch.Size([4, 3, 4])


tensor([[[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]],

        [[1., 0., 1., 1.],
         [1., 0., 1., 1.],
         [1., 0., 1., 1.]]])

## Arithmetic operations

In [25]:
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
# ``tensor.T`` returns the transpose of a tensor
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)

y3 = torch.rand_like(y1)
# out=y3 stores the output to y3
torch.matmul(tensor, tensor.T, out=y3)


# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])

## Single-element tensors 

In [31]:
agg = tensor.sum()
print(agg, type(agg))

# we can convert the single tensor value to to a Python numerical value
agg_item = agg.item()
print(agg_item, type(agg_item))

tensor(12.) <class 'torch.Tensor'>
12.0 <class 'float'>


## In-place operations

These operations are denoted by a _ suffix.

In [32]:
print(f"{tensor} \n")
tensor.add_(5)
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


# Bridge with NumPy

## Tensor to NumPy array

In [33]:
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


- A change in the tensor reflects in the NumPy array.

In [34]:
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


---

# Build the Neural Network

Neural networks comprise of layers/modules that perform operations on data. The torch.nn namespace provides all the building blocks you need to build your own neural network. Every module in PyTorch subclasses the nn.Module. A neural network is a module itself that consists of other modules (layers). This nested structure allows for building and managing complex architectures easily.

In [36]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
# from torchvision import datasets, transforms

## Get Device for Training

In [37]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


## Define the Class

We define our neural network by subclassing nn.Module, and initialize the neural network layers in __init__. Every nn.Module subclass implements the operations on input data in the forward method.

In [38]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [39]:
import torch
print(torch.__version__)

1.13.1+cu117


In [42]:
import torch.nn as nn

class MyPyTorchModel(nn.Module):
    def __init__(self):
        super(MyPyTorchModel, self).__init__()
        self.layers = nn.ModuleList()

    def add(self, layer):
        self.layers.append(layer)

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = layer(x)
            x = nn.functional.relu(x)  # apply ReLU activation
        x = self.layers[-1](x)
        return x
    
    def train(self, train_loader, cost_fn, optimizer_fn, num_epochs=10):
        # set the model to training mode
        self.train()

        for epoch in range(num_epochs):
            running_loss = 0.0

            for i, (inputs, labels) in enumerate(train_loader):
                # zero the parameter gradients
                optimizer_fn.zero_grad()

                # forward + backward + optimize
                outputs = self(inputs)
                loss = cost_fn(outputs, labels)
                loss.backward()
                optimizer_fn.step()

                # print statistics
                running_loss += loss.item()

            print(f"Epoch {epoch+1}, loss: {running_loss / len(train_loader)}")

        print('Finished Training')

    

model = MyPyTorchModel()

# add layers to the model
model.add(nn.Linear(784, 128))
model.add(nn.ReLU())
model.add(nn.Linear(128, 10))
model.add(nn.Softmax(dim=1))
