In [None]:
import torch
import numpy as np
import torch.nn as nn

# Torch Tensors

In [None]:
# making an empty tensor
x = torch.empty(2, 3, 4)
print(x)


# making a tensor of zeros
x = torch.zeros(2, 3, 4)
print(x)


# making a tensor of ones
x = torch.ones(2, 3, 4, dtype=torch.int) # dtype changes the type of the values in the tensor
print(x)


# making a tensor with random values
x = torch.rand(2, 2) # uniform distribution on [0, 1)
x = torch.randn(2, 2) # normal distribution


# operations with tensors
x = torch.rand(2, 2)
y = torch.rand(2, 2)
print(x, y)
z = x + y
z = torch.add(x, y) # this is the same as z = x+y
y.add_(x) # this is the same as y = y + x
z.add_(1) # 1 is added to every element of z
# everything is same with -, * and / (sub, mul, div)


# slicing tensors
x = torch.rand(5, 3)
print(x[:, 0]) # 0-th column
print(x[0, 0].item()) # value of the element in place [0, 0]


# reshaping tensors
x = torch.rand(4, 4)
print(x.view(16)) # "flattening" the tensor
print(x.view(-1, 8)) # making a ?x8 tensor ==> ? is 2


# numpy <--- torch
a = torch.ones(5)
b = a.numpy() # array b will share memory locations with the tensor a, it only has the reference
print(type(b))


# numpy <--- torch
a = np.ones(5)
b = torch.from_numpy(a) # again only the reference is passed


# Tensors on the GPU
if(torch.cuda.is_available()):
    device = torch.device("cuda")
    x = torch.ones(5, device=device)
    y = torch.ones(5)
    y = y.to(device)
    # Note - can't convert between numpy and torch with tensors on GPU
    y = y.to("cpu") # moving the tensor back to the CPU




# Torch Autograd

In [None]:
x = torch.rand(3, requires_grad=True) # a computational grapht will be created so that later the gradient can be determined for variables dependant on x
print(x)

y = x * 2 + 2
print(y)
z = y * y
z = z.mean() # calculating the mean of the elements in the tensor
print(z)

z.backward() # calculates the gradient dz/dx
    # NOTE - .backward() method can be called if z is a scalar
print(x.grad)


# IMPORTANT - when updating the parameters of a NN, we don't want the gradient to be calculated
# There are three soutions:
x.requires_grad_(False)
x.detach()
with torch.no_grad():
    y = x * 2 + 1
    print(y)


In [None]:
weights = torch.randn(4, requires_grad=True)

for epoch in range(3):
     output = (weights*3).sum()

     output.backward()
     print(weights.grad)

     weights.grad.zero_() # reseting the gradients between training epochs

In [None]:
# USING OPTIMIZ ERS

optimizer = torch.optim.SGD(weights, lr=0.01)

output.backward()

optimizer.step()
optimizer.zero_grad()

# Backpropagation

In [None]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass and computing the loss
y_hat = w * x
loss = (y_hat - y)**2

print(loss)

# backward pass
loss.backward()
print(w.grad)

# update the weights
# ...

# Gradient descent

Version #1

*   prediction - manually
*   gradient computation - manually
*   loss computation - manually
*   parameter updates - manually



In [None]:
# only using numpy

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0

# model prediction
def forward(x):
    return w * x

# loss
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()

# gradient
def gradient(x, y, y_predicted):
    return np.dot(2*x, y_predicted - y).mean()


print(f'Prediction before training f(5): {forward(5):.3f}')



# Training
lr = 0.01
n_iters = 50

for epoch in range(n_iters):
    y_pred = forward(X)

    l = loss(Y, y_pred)

    dw = gradient(X, Y, y_pred)

    w -= lr * dw
    if epoch % 5 == 0:
        print(f'epoch: {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training f(5): {forward(5):.3f}')


Version #2

*   prediction - manually
*   gradient computation - **Autograd**
*   loss computation - manually
*   parameter updates - manually

In [None]:
# only using numpy

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

# loss
def loss(y, y_predicted):
    return ((y_predicted-y)**2).mean()


print(f'Prediction before training f(5): {forward(5):.3f}')



# Training
lr = 0.01
n_iters = 100

for epoch in range(n_iters):
    y_pred = forward(X)

    l = loss(Y, y_pred)

    l.backward()

    with torch.no_grad():
        w -= lr * w.grad

    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch: {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training f(5): {forward(5):.3f}')

Version #3

*   prediction - manually
*   gradient computation - **Autograd**
*   loss computation - **PyTorch Loss**
*   parameter updates - **Pytorch Optimizer**

In [None]:
# only using numpy

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

print(f'Prediction before training f(5): {forward(5):.3f}')


# Training
lr = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr)

for epoch in range(n_iters):
    y_pred = forward(X)

    l = loss(Y, y_pred)

    l.backward()

    optimizer.step()

    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f'epoch: {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training f(5): {forward(5):.3f}')

Version #4

*   prediction - **Pytoch Model**
*   gradient computation - **Autograd**
*   loss computation - **PyTorch Loss**
*   parameter updates - **Pytorch Optimizer**

In [None]:
# only using numpy

X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)
n_samples, n_features = X.shape


# model = nn.Linear(n_features, n_features)

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(n_features, n_features)


print(f'Prediction before training f(5): {model(X_test).item():.3f}')


# Training
lr = 0.05
n_iters = 500

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr)

for epoch in range(n_iters):
    y_pred = model(X)

    l = loss(Y, y_pred)

    l.backward()

    optimizer.step()

    optimizer.zero_grad()

    if epoch % 50 == 0:
        [w, b] = model.parameters()
        print(f'epoch: {epoch + 1}: w = {w[0, 0].item() :.3f}, loss = {l:.8f}')

print(f'Prediction after training f(5): {model(X_test).item():.3f}')

# Logistic Regression

Importing the datasets

In [None]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

Step 0 - Preparing the data:

In [None]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# scaling the features
sc =  StandardScaler() # sc stands for standard scaler
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

Step 1 - Model

In [None]:
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.lin = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_pred = torch.sigmoid(self.lin(x))
        return y_pred

model = LogisticRegression(n_features)

*Digression*

Step 1.5 - Model with Activation Functions

- Here is the list of possible activation function: ReLU, Sigmoid, Softmax, TanH, LeakyReLU (they are all accessed by nn.* or torch.*)

In [None]:
class LogisticRegression2(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.lin1 = nn.Linear(n_input_features, 5)
        self.relu = nn.ReLU()
        self.lin2 = nn.Linear(5, 1)

    def forward(self, x):
        x = self.lin1(x)
        x = self.relu(x)
        x = self.lin2(relu)
        x = torch.sigmoid(x)
        return x

model = LogisticRegression(n_features)

Step 2 - Loss and Optimizer

In [None]:
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

Step 3 - Training Loop

In [None]:
num_epochs = 10000

for epoch in range(num_epochs):
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)

    loss.backward()

    optimizer.step()

    optimizer.zero_grad()

    if (epoch+1) % 1000 == 0:
        print(f'epoch: {epoch+1}, loss={loss.item():.4f}')

Evaluation:

In [None]:
with torch.no_grad():
    y_pred = model(X_test)
    y_pred_cls = y_pred.round()
    acc = y_pred_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'acc={acc:.4f}')

# Datasets and DataLoaders

In [None]:
import torchvision
from torch.utils.data import Dataset, DataLoader
import math
import pandas as pd

In [None]:
# uploading the file to google colab

from google.colab import files
uploaded = files.upload()

In [None]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading
        df = pd.read_csv('wine.csv', delimiter=",", dtype=np.float32, skiprows=1)
        xy = df.to_numpy()
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]])
        self.n_samples = xy.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples

dataset = WineDataset()
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

In [None]:
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)


for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(dataloader):
        # forward
        # backward
        # update
        if (i+1)%5==0:
            print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')



# Dataset Transforms

  See the list of built-in transforms: https://pytorch.org/vision/0.9/transforms.html

Making a dataset for testing purposes:

In [None]:
class WineDatasetWithTransform(Dataset):
    def __init__(self, transform=None):
        # data loading
        df = pd.read_csv('wine.csv', delimiter=",", dtype=np.float32, skiprows=1)
        xy = df.to_numpy()
        self.x = xy[:, 1:]
        self.y = xy[:, [0]]
        self.n_samples = xy.shape[0]

        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample

    def __len__(self):
        return self.n_samples

Making a custom transforms:

In [None]:
class ToTensor:
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)

class MulTransform:
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):
        inputs, target = sample
        inputs *= self.factor
        return inputs, target


# dataset = WineDatasetWithTransform(transform=ToTensor())
# first_data = dataset[0]
# features, labels = first_data
# print(type(features), type(labels))

Composing multiple transforms:

In [None]:
# Applying only one transform (for comparison):
dataset = WineDatasetWithTransform(transform=ToTensor())
first_data = dataset[0]
features, labels = first_data
print("Without MulTransform:")
print(features)
print(type(features), type(labels))


print("__________________________________")

# Applying a composition of more transforms:
composed = torchvision.transforms.Compose([ ToTensor(), MulTransform(2) ])

dataset = WineDatasetWithTransform(transform=composed)
first_data = dataset[0]
features, labels = first_data
print("WITH MulTransform:")
print(features)
print(type(features), type(labels))

# Softmax and Cross Entropy Loss

**Softmax** formula:

$S(y_i)=\frac{e^{y_i}}{\sum_{j=0}^{n} e^{y_j}}$

In [None]:
# basic implementation of the softmax function
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print("softmax numpy: ", outputs)

# Torch has the softmax function implemented already
x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print("softmax torch: ", outputs)

**Cross Entropy** formula:

$D(\hat{Y}, Y) = -\frac{1}{N} \cdot \sum_i Y_i \cdot \log(\hat{Y}_i)$

- $Y$ - correct (expected) output
- $\hat{Y}$ - prediction for the output

This is used to measure the performance of a classification model

Note - Y must be hot encoded

In [None]:
# basic implementation of the scross entropy function
def cross_entropy(y, y_hat):
    return -np.sum(y * np.log(y_hat))

Y = np.array([1, 0, 0])

Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])

l1 = cross_entropy(Y, Y_pred_good)
l2 = cross_entropy(Y, Y_pred_bad)

print(f'Loss1 numpy: {l1:.4f}')
print(f'Loss2 numpy: {l2:.4f}')

In [None]:
# using torch for the sross entropy function
# advantage to using this is that it can work with more samples

# No Softmax in the last layer ==> Y_pred has raw scores
# Y <=> class labels, not one-hot

criterion = nn.CrossEntropyLoss()

Y = torch.tensor([0, 1, 2, 0]) # correct answer is class 0

# dimension is n_samples x n_classes ---> 4x3
Y_pred_good = torch.tensor([[ 2.0, 1.0, 0.1 ], [ 0.8, 1.5, 0.2 ], [ 0.2, 0.7, 2.4 ], [ 2.0, 1.0, 0.1 ]])
Y_pred_bad = torch.tensor([[ 0.5, 2.5, 0.3 ], [ 2.5, 0.3, 0.1 ], [ 0.5, 2.5, 0.3 ], [ 0.5, 2.5, 0.3 ]])

# Using the nn.CrosEntropyLoss to calculate the losses
l1 = criterion(Y_pred_good, Y)
l2 = criterion(Y_pred_bad, Y)

print(l1.item()) # calling .item() because this is a tensor with only one element
print(l2.item())

max1, predictions1 = torch.max(Y_pred_good, 1)
max2, predictions2 = torch.max(Y_pred_bad, 1)
print(max1, predictions1)
print(max2, predictions2)

# Adjusting the learning rate

In [None]:
import torch.optim.lr_scheduler as lr_scheduler

lr = 0.1
model = nn.Linear(10, 1)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)

lambda1 = lambda epoch: epoch / 10
scheduler = lr_scheduler.LambdaLR(optimizer, lambda1)

print(optimizer.state_dict())

for epoch in range(5):
    # loss.backward()
    optimizer.step()
    # validate(...)
    scheduler.step()
    print(optimizer.state_dict()['param_groups'][0]['lr'])

## **List of schedulers:**
- **LambdaLR**         --> initial_lr * function_result
- **MultiplicativeLR** --> prev_lr * function_result
- **StepLR**           --> prev_lr * *gamma* (every *step_size* iterations)
- **MultiStepLR**      --> prev_lr * *gamma* (at every *milestones* iteration)
- **ExponentialLR**    --> prev_lr * *gamma*
- **ReducedLROnPlateau** --> ...
- CyclicRL, OneCycleLR,...