# Lab 7 - Introduction to PyTorch

Tensors are data structures that are similar to arrays and matrices. Tensors are similar to NumPy arrays and they can run on GPUs or other specialized hardware to accelerate computing. PyTorch is a machine learning framework that allows us to create, train, and test models. In PyTorch, we use tensors to encode the inputs and outputs of a model, as well as the model’s parameters.

In [None]:
import torch
import numpy as np

## Tensors
### Creating a tensor

In [None]:
data = [[1,2], [3,4]]
x_data = torch.tensor(data)

print(x_data)

tensor([[1, 2],
        [3, 4]])


In [None]:
np_array = np.array(data)
x_np_tensor = torch.from_numpy(np_array)

print(x_np_tensor, type(x_np_tensor))

tensor([[1, 2],
        [3, 4]]) <class 'torch.Tensor'>


In [None]:
# Create tensors of a specific shape

shape = (2, 3,)

rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor of shape {shape}: \n{rand_tensor}\n")
print(f"Ones Tensor of shape {shape}: \n{ones_tensor}\n")
print(f"Zero Tensor of shape {shape}: \n{zeros_tensor}\n")


Random Tensor of shape (2, 3): 
tensor([[0.6786, 0.0137, 0.8147],
        [0.5991, 0.9891, 0.7848]])

Ones Tensor of shape (2, 3): 
tensor([[1., 1., 1.],
        [1., 1., 1.]])

Zero Tensor of shape (2, 3): 
tensor([[0., 0., 0.],
        [0., 0., 0.]])



In [None]:
x_ones = torch.rand_like(x_np_tensor, dtype=torch.float)
print(x_ones)

tensor([[0.1960, 0.4169],
        [0.3002, 0.6913]])


### Tensor Attributes and Operations

In [None]:
print(x_np_tensor.shape)
print(x_np_tensor.size())
print(x_np_tensor.dtype)
print(x_np_tensor)

torch.Size([2, 2])
torch.Size([2, 2])
torch.int64
tensor([[1, 2],
        [3, 4]])


In [None]:
print(x_np_tensor.device)

cpu


- Edit -> Notebook Settings -> Hardware accelerator -> GPU

In [None]:
if torch.cuda.is_available():
  x_np_tensor = x_np_tensor.to('cuda')

In [None]:
print(x_np_tensor.device)

cuda:0


In [None]:
print(x_np_tensor.view(1,4))

tensor([[1, 2, 3, 4]], device='cuda:0')


In [None]:
tensor = torch.ones(4,4)
tensor[:,2] = 0

In [None]:
print(tensor)

tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]])


In [None]:
# tensor cat and stack operations

cat_tensor = torch.cat([tensor, tensor], dim=1)
print(cat_tensor, cat_tensor.shape)

tensor([[1., 1., 0., 1., 1., 1., 0., 1.],
        [1., 1., 0., 1., 1., 1., 0., 1.],
        [1., 1., 0., 1., 1., 1., 0., 1.],
        [1., 1., 0., 1., 1., 1., 0., 1.]]) torch.Size([4, 8])


In [None]:
stack_tensor = torch.stack([tensor, tensor], dim=1)
print(stack_tensor, stack_tensor.shape)

tensor([[[1., 1., 0., 1.],
         [1., 1., 0., 1.]],

        [[1., 1., 0., 1.],
         [1., 1., 0., 1.]],

        [[1., 1., 0., 1.],
         [1., 1., 0., 1.]],

        [[1., 1., 0., 1.],
         [1., 1., 0., 1.]]]) torch.Size([4, 2, 4])


In [None]:
print(tensor)

mult_tensor = tensor.mul(tensor)
print(mult_tensor)

mult_tensor = tensor*tensor
print(mult_tensor)

tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]])
tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]])
tensor([[1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.],
        [1., 1., 0., 1.]])


In [None]:
mat_mul_tensor = tensor.matmul(tensor)
print(mat_mul_tensor)

mat_mul_tensor = tensor @ tensor
print(mat_mul_tensor)

tensor([[3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.]])
tensor([[3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.],
        [3., 3., 0., 3.]])


In [None]:
mat_mul_tensor = tensor.matmul(tensor.T)
print(mat_mul_tensor)

mat_mul_tensor = tensor @ tensor.t()
print(mat_mul_tensor)

tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])
tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


In [None]:
tensor.add_(5)
print(tensor)

tensor([[6., 6., 5., 6.],
        [6., 6., 5., 6.],
        [6., 6., 5., 6.],
        [6., 6., 5., 6.]])


### Autograd for tensors

Autograd is now a core torch package for automatic differentiation. It uses a tape based system for automatic differentiation. In autograd, if any input Tensor of an operation has requires_grad=True, the computation will be tracked. In the forward phase, the autograd tape will remember all the operations it executed, and in the backward phase, it will replay the operations.

In [None]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [None]:
print(x.grad_fn)

None


In [None]:
y = x ** 2
print(y)

tensor([[1., 1.],
        [1., 1.]], grad_fn=<PowBackward0>)


In [None]:
with torch.no_grad():
  y = x*8
  print(y)

tensor([[8., 8.],
        [8., 8.]])


### Writing a Simple Neural Network with PyTorch

In [None]:
import os
import torch
from torch import nn

In [None]:
class NeuralNetwork(nn.Module):

    def __init__(self):
        super(NeuralNetwork, self).__init__()

        n_features = 512

        self.linear_relu_stack = nn.Sequential(
        nn.Linear(n_features, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 1),
    )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits
device = "cpu"

print(f"Using {device} device")
model = NeuralNetwork().to(device)

print(model)


Using cpu device
NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [None]:
n_features = 512

X = torch.rand(10, n_features, device=device)

predictions = model(X)
print(f"Predicted class: {predictions}")

Predicted class: tensor([[-0.0261],
        [-0.0120],
        [ 0.0037],
        [-0.0177],
        [-0.0342],
        [-0.0096],
        [ 0.0257],
        [-0.0285],
        [-0.0097],
        [ 0.0088]], grad_fn=<AddmmBackward0>)


In [None]:
input_tensor = torch.rand(5, n_features, device=device)
print(input_tensor.size())

layer1 = nn.Linear(in_features=n_features, out_features=256)
layer1 = layer1.to(device)

hidden1 = layer1(input_tensor)
print(hidden1.size())

torch.Size([5, 512])
torch.Size([5, 256])


#### Activation function

Non-linear activations are what create the complex mappings between the model’s inputs and outputs. They are applied after linear transformations to introduce nonlinearity, helping neural networks learn a wide variety of phenomena.


We Use the ReLU activation function here. 


In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.1003,  0.3482,  0.0528,  ..., -0.1302, -0.0600, -0.2550],
        [-0.2661,  0.4191,  0.1047,  ..., -0.1795, -0.1268, -0.3053],
        [-0.0054,  0.5258,  0.1014,  ..., -0.0630, -0.0058, -0.5310],
        [ 0.1295,  0.3272, -0.0006,  ..., -0.2915, -0.0753, -0.2488],
        [ 0.1131,  0.2473,  0.1776,  ..., -0.1378, -0.1342, -0.4045]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.1003, 0.3482, 0.0528,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.4191, 0.1047,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.5258, 0.1014,  ..., 0.0000, 0.0000, 0.0000],
        [0.1295, 0.3272, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.1131, 0.2473, 0.1776,  ..., 0.0000, 0.0000, 0.0000]],
       grad_fn=<ReluBackward0>)


In [None]:
seq_modules = nn.Sequential(
    layer1,
    nn.ReLU(),
    nn.Linear(256, 1)
)
seq_modules = seq_modules.to(device)

input_tensor = torch.rand(10,n_features, device=device)

logits = seq_modules(input_tensor)
print(logits)


tensor([[-0.2164],
        [-0.1073],
        [-0.1362],
        [-0.2694],
        [-0.2072],
        [-0.3088],
        [-0.1843],
        [-0.2047],
        [-0.2641],
        [-0.2594]], grad_fn=<AddmmBackward0>)


#### Last layer for prediction

Since this is a regression task, no activation function is required for last layer.

In a classification task, the last linear layer of the neural network returns logits - raw values in [-infty, infty]. The are passed to the nn.Softmax module and the logits are scaled to values [0, 1] representing the model’s predicted probabilities for each class. dim parameter indicates the dimension along which the values must sum to 1.


#### Neural Network hyperparameters

Hyperparameters are adjustable parameters that let you control the model optimization process. Different hyperparameter values can impact model training and convergence rates 

We define the following hyperparameters for training:
- Number of Epochs - the number times to iterate over the dataset
- Batch Size - the number of data samples propagated through the network before the parameters are updated
- Learning Rate - how much to update models parameters at each batch/epoch. Smaller values yield slow learning speed, while large values may result in unpredictable behavior during training.


In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

#### Loss function

When presented with some training data, our untrained network is likely not to give the correct answer. Loss function measures the degree of dissimilarity of obtained result to the target value, and it is the loss function that we want to minimize during training. To calculate the loss we make a prediction using the inputs of our given data sample and compare it against the true data label value.

Common loss functions include nn.MSELoss (Mean Square Error) for regression tasks, and nn.NLLLoss (Negative Log Likelihood) for classification. nn.CrossEntropyLoss combines nn.LogSoftmax and nn.NLLLoss.



In [None]:
# Initialize the loss function
loss_fn = nn.MSELoss()

#### Optimizer

Optimization is the process of adjusting model parameters to reduce model error in each training step. Optimization algorithms define how this process is performed (in this example we use Stochastic Gradient Descent). All optimization logic is encapsulated in the optimizer object. Here, we use the SGD optimizer; additionally, there are many different optimizers available in PyTorch such as ADAM and RMSProp, that work better for different kinds of models and data.

We initialize the optimizer by registering the model’s parameters that need to be trained, and passing in the learning rate hyperparameter.




In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

Inside the training loop, optimization happens in three steps:

- Call optimizer.zero_grad() to reset the gradients of model parameters. Gradients by default add up; to prevent double-counting, we explicitly zero them at each iteration.

- Backpropagate the prediction loss with a call to loss.backward(). PyTorch deposits the gradients of the loss w.r.t. each parameter.

- Once we have our gradients, we call optimizer.step() to adjust the parameters by the gradients collected in the backward pass.


In [None]:
def train_loop(X_train, Y_train, loss_fn, optimizer, device):
    size = len(X_train)
    batch_size = 32
    n_batches = int(size/batch_size)
    epochs = 50
    for epoch_id in range(epochs):
      running_loss = []
      for batch_id in range(n_batches):
          X_batch = X_train[batch_id*batch_size: (batch_id+1)*batch_size]
          Y_batch = Y_train[batch_id*batch_size: (batch_id+1)*batch_size]
          X_batch = torch.FloatTensor(X_batch, device=device)
          Y_batch = torch.FloatTensor(Y_batch, device=device)

          # Compute prediction and loss
          pred = model(X_batch)
          loss = loss_fn(pred.squeeze(-1), Y_batch)
          running_loss.append(loss.item())
          # Backpropagation
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          loss, current = loss.item(), batch_id * len(X)
      print(f" Epoch: {epoch_id} loss: {np.mean(running_loss):>7f}")


### **RDKit**

In [None]:
!wget -c https://repo.continuum.io/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
!chmod +x Miniconda3-py37_4.8.3-Linux-x86_64.sh
!time bash ./Miniconda3-py37_4.8.3-Linux-x86_64.sh -b -f -p /usr/local
!time conda install -q -y -c rdkit rdkit

import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')

--2022-04-30 12:58:00--  https://repo.continuum.io/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
Resolving repo.continuum.io (repo.continuum.io)... 104.18.201.79, 104.18.200.79, 2606:4700::6812:c84f, ...
Connecting to repo.continuum.io (repo.continuum.io)|104.18.201.79|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh [following]
--2022-04-30 12:58:00--  https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh
Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.131.3, 104.16.130.3, 2606:4700::6810:8203, ...
Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.131.3|:443... connected.
HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable

    The file is already fully retrieved; nothing to do.

PREFIX=/usr/local
Unpacking payload ...
Collecting package metadata (current_repodata.json): - \ | done
Solving environment

In [None]:
import pandas as pd
import numpy as np
import rdkit
from tqdm.auto import tqdm

from rdkit import Chem
from rdkit.Chem import AllChem


In [None]:
df = pd.read_csv('./esol.csv')
df = df[['measured log solubility in mols per litre','smiles']]
X = []
Y = []

for idx, row in tqdm(df.iterrows()):
  smile = row['smiles']
  sol = row['measured log solubility in mols per litre']

  fingerprint = np.asarray(AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smile),2,nBits=512))

  X.append(fingerprint)
  Y.append(sol)

X = np.vstack(X)
Y =  np.hstack(Y)

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_valid, X_test, Y_valid, Y_test = train_test_split(X_test, Y_test, test_size=0.5, random_state=42)
print(X_train.shape,X_valid.shape,X_test.shape)
print(Y_train.shape,Y_valid.shape,Y_test.shape)

0it [00:00, ?it/s]

(902, 512) (113, 512) (113, 512)
(902,) (113,) (113,)


In [None]:
train_loop(X_train, Y_train, loss_fn, optimizer, device)

 Epoch: 0 loss: 1.880917
 Epoch: 1 loss: 1.855083
 Epoch: 2 loss: 1.830060
 Epoch: 3 loss: 1.805774
 Epoch: 4 loss: 1.782171
 Epoch: 5 loss: 1.759254
 Epoch: 6 loss: 1.736969
 Epoch: 7 loss: 1.715303
 Epoch: 8 loss: 1.694268
 Epoch: 9 loss: 1.673843
 Epoch: 10 loss: 1.653983
 Epoch: 11 loss: 1.634666
 Epoch: 12 loss: 1.615820
 Epoch: 13 loss: 1.597451
 Epoch: 14 loss: 1.579554
 Epoch: 15 loss: 1.562102
 Epoch: 16 loss: 1.545095
 Epoch: 17 loss: 1.528490
 Epoch: 18 loss: 1.512261
 Epoch: 19 loss: 1.496395
 Epoch: 20 loss: 1.480907
 Epoch: 21 loss: 1.465788
 Epoch: 22 loss: 1.451034
 Epoch: 23 loss: 1.436632
 Epoch: 24 loss: 1.422513
 Epoch: 25 loss: 1.408702
 Epoch: 26 loss: 1.395215
 Epoch: 27 loss: 1.382033
 Epoch: 28 loss: 1.369112
 Epoch: 29 loss: 1.356473
 Epoch: 30 loss: 1.344093
 Epoch: 31 loss: 1.331942
 Epoch: 32 loss: 1.320039
 Epoch: 33 loss: 1.308328
 Epoch: 34 loss: 1.296865
 Epoch: 35 loss: 1.285594
 Epoch: 36 loss: 1.274545
 Epoch: 37 loss: 1.263656
 Epoch: 38 loss: 1.252

In [None]:
def test_loop(X_test, Y_test, loss_fn):
    size = len(X_test)
    batch_size = 32
    n_batches = int(size/batch_size)
    running_loss = []
    model.eval()
    for batch_id in range(n_batches):
        X_batch = X_test[batch_id*batch_size: (batch_id+1)*batch_size]
        Y_batch = Y_test[batch_id*batch_size: (batch_id+1)*batch_size]
        X_batch = torch.FloatTensor(X_batch)
        Y_batch = torch.FloatTensor(Y_batch)

        # Compute prediction and loss
        pred = model(X_batch)
        loss = loss_fn(pred.squeeze(-1), Y_batch)
        running_loss.append(loss.item())

        loss, current = loss.item(), batch_id * len(X)

        
    print(f" MSE: {np.mean(running_loss):>7f}")


In [None]:
test_loop(X_test, Y_test, loss_fn)

 MSE: 2.027718


## Assignment: Toxicity classification


You are given a dataset of SMILES and whether they are toxic or not. 
Task is to create a neural network model for predicting whether a given molecule is toxic or not. 



In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        n_features = 512
        self.linear_relu_stack = nn.Sequential(
        nn.Linear(n_features, 256),
        nn.ReLU(),
        nn.Linear(256, 128),
        nn.ReLU(),
        nn.Linear(128, 1),
    )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = NeuralNetwork().to(device)
print(model)

loss_fn = nn.BCEWithLogitsLoss()

Using cuda device
NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=1, bias=True)
  )
)
