# Lab VI

In [None]:
import numpy as np
import torch

# Tensors

*Tensor is a n-dimensional array, it means that it can have any number of dimensions.NumPy `ndarray` can not be loaded into GPUs. However, `tensor`s can do so.*

In [None]:
x = torch.tensor(4.)
x

In [None]:
y = torch.tensor([1, 2, 3, 4, 5.0])
y

In [None]:
z = torch.tensor([
    [1, 2, 3],
    [3, 4, 5],
    [6, 7, 8]
])
z

*We can use both NumPy stype `shape` or PyTorch specific `size()` function to get the size of the tensor.**

In [None]:
print(x.shape)
print(y.size())
print(z.shape)

In [None]:
print(x.dtype)
print(y.dtype)
print(z.dtype)

*PyTorch is perfectly compatible with NumPy. We use `from_numpy()` to create `tensor` from `ndarray`, and use `numpy()` to convert a `tensor` into a `ndarray`. However, converting between them does not change the memory location of them. So be careful.*

In [None]:
n = np.arange(16).reshape(4, 4)
n

In [None]:
tn = torch.from_numpy(n)
tn

In [None]:
tn.numpy()

In [None]:
print(n.dtype)
print(tn.dtype)

*PyTorch even has NumPy like interface for creating tensors. Common utility functions for creating a `tensor` include `empty`, `ones`, `zeros`, `rand`, `randint`, etc., and their corresponding `empty_like`, `rand_like`, `randint_like`, etc.*

In [None]:
x = torch.empty(2, 2)
x

In [None]:
y = torch.zeros(3, 6)
y

In [None]:
o = torch.ones(2, 2)
o

In [None]:
z = torch.eye(4)
z

In [None]:
torch.ones_like(z, dtype=torch.float32)

In [None]:
torch.rand(2, 2)

*Slicing and indexing is similar to NumPy. However, we can use `item` to get the actual value if the tensor has only one item.*

In [None]:
x = torch.tensor(5.)
x

In [None]:
x.item()

*We can use `view` function to reshape a tensor.*

In [None]:
x = torch.randint(0, 5, (5, 4))
x

In [None]:
y = x.view(2, 10)
y

In [None]:
y.reshape(2, 10)

In [None]:
y.view(4, 5)

In [None]:
y

In [None]:
y.view(4, -1)

*`unsqueeze` can add a new dimension at the specified position.*

In [None]:
x.size()

In [None]:
torch.unsqueeze(x, 0).shape

*We can concatenate two tensors together.*

In [None]:
x = torch.ones(2, 2)
y = torch.zeros(2, 4)

In [None]:
x

In [None]:
y

In [None]:
z = torch.cat([x, y], dim=1)
z

In [None]:
torch.cat([z, torch.ones(1, 6)], dim=0)

*Finally, we can find out the device that our tensors are running on.**

In [None]:
x

# Gradients

In [None]:
x = torch.tensor(5.)
w = torch.tensor(2., requires_grad=True)
b = torch.tensor(3., requires_grad=True)

In [None]:
y = w*x + b
z = 3*y
z

*We can automatically calculate the derivatives of `y` with respect to its parameters which have `requires_grad=True` set.*

In [None]:
z.backward()

In [None]:
print('dy/dw: ', w.grad)
print('dy/db: ', b.grad)

*If you do not want to calculate gradients for some operation, you can use `with torch.no_grad()`.*

In [None]:
with torch.no_grad():
    w -= w.grad * 0.001
    b -= b.grad * 0.001

In [None]:
print('W: ', w)
print('B: ', b)

In [None]:
print('dy/dw: ', w.grad)
print('dy/db: ', b.grad)

*We can clear the gradients using `zero_` function. Any function with a `_` in the end usually  means the operation is performed in-place.*

In [None]:
w.grad.zero_()
b.grad.zero_()

In [None]:
print('dy/dw: ', w.grad)
print('dy/db: ', b.grad)

# A Simple Linear Regression

*Preparing `iris` data.*

In [None]:
from sklearn.datasets import load_iris

In [None]:
iris = load_iris()

In [None]:
iris.data.shape

*We will be taking first `3` columns, which are sepal lenght, sepal width, and petal length, as our features, and the last column, which is petal width, as target.**

In [None]:
x = iris.data[:, :3]
y = iris.data[:, 3]

In [None]:
print(x.shape)
print(y.shape)

*Next, we create tensors from the `ndarray`. Note that we have changed the shape of the label array. At first, `y` was `0` dimensional. To make it work with matrices, we changed it to `-1x1` dimensional, where `-1` means whatever numebr necessary or left after setting `1` column.* 

In [None]:
x = torch.from_numpy(x)
y = torch.from_numpy(y.reshape(-1, 1))

In [None]:
print(x.shape)
print(x.dtype)
print(y.shape)
print(y.dtype)

*Weight initialization. We initialize a `3x1` matrix as there are `3` features, and a `1x1` bias.*

In [None]:
w = torch.rand(3, 1, requires_grad=True, dtype=torch.float64)
b = torch.rand(1, 1, requires_grad=True, dtype=torch.float64)

In [None]:
print(w)

In [None]:
print(b)

In [None]:
print(w.shape)
print(w.dtype)
print(b.shape)
print(b.dtype)

*Creating model. Our simple linear model has equation of $w_{1}x_{1} + w_{2}x_{2} + w_{3}x_{3} + b$, which is simply achieved by computing $x\times W + b$.*

In [None]:
def model(x):
    return x @ w + b

In [None]:
def mse(pred, label):
    ae = pred - label
    return torch.sum(ae * ae) / ae.numel()

In [None]:
epochs = 10
lr = 0.01

for e in range(epochs):
    predictions = model(x)
    loss = mse(predictions, y)
    
    print('Epoch ', e, ': Loss ', loss)
    
    with torch.no_grad():
        loss.backward()
        
        w -= w.grad * lr
        b -= b.grad * lr
        
        w.grad.zero_()
        b.grad.zero_()

# Regression Using PyTorch

In [None]:
import torch.nn as nn
from torch.optim import SGD, Adam
from torch.nn.functional import mse_loss
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader

In [None]:
class MakeTensor:
    def __call__(self, sample):
        x, y = sample
        return torch.from_numpy(x.astype(np.float32)), torch.from_numpy(y.astype(np.float32))

In [None]:
class IRISDataset(Dataset):
    def __init__(self, transforms=None):
        data = load_iris()
        self.x = data.data[:, 0:3]
        self.y = data.data[:, [3]]
        self.n = data.data.shape[0]
        self.transforms = transforms
    
    def __getitem__(self, index):
        sample = self.x[index], self.y[index]
        
        if self.transforms:
            sample = self.transforms(sample)
        return sample
    
    def __len__(self):
        return self.n

In [None]:
dataset = IRISDataset(transforms=MakeTensor())

In [None]:
dataset[0]

In [None]:
datasampler = SubsetRandomSampler(np.arange(len(dataset)))
dataloader = DataLoader(dataset, 32, sampler=datasampler)

In [None]:
for xa, ya in dataloader:
    print(xa.shape)
    print(ya.shape)

*Rememeber that the entire `nn` module only supports mini-batches. So, if your image has size `3x32x32`, it will be actually `1x3x32x32` during the training. The first dimension is the mini-batch dimension. You can convert a single 3-dimensional image to 4-dimensional mini-batch using `torch.unsqueeze()`.*

In [None]:
model = nn.Linear(in_features=3, out_features=1, bias=True)

In [None]:
model.weight

In [None]:
model.bias

In [None]:
list(model.parameters())

In [None]:
optimizer = SGD(model.parameters(), lr=1e-3)

Suppose your `epochs=10`, your training data has `10,000` observations, and batch size is `1,000`. Then, then number of weight updates that will take place is `10 * (10,000/1,000)`. It means in each epoch, there will be `10000/1000` or `10` steps. Therefore, in `10` epochs, there will be `100` steps.

In [None]:
epochs = 25

for e in range(epochs):
    for xb, yb in dataloader:
        predictions = model(xb)
        loss = mse_loss(predictions, yb)

        with torch.no_grad():
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    
    print('Epoch ', e)
    print(loss)

# Logistic Regression

In [None]:
import matplotlib.pyplot as plt
import torchvision
from torch.nn.functional import relu, softmax, cross_entropy
from torchvision.datasets import MNIST
from torchvision.transforms import transforms
from torch.utils.data.sampler import SubsetRandomSampler

In [None]:
mnist = MNIST(root='./data/', download=True, transform=transforms.ToTensor())

In [None]:
mnist

In [None]:
def train_val_split(n, val_fraction=0.2):
    nval = int(n * val_fraction)
    idx = np.random.permutation(n)
    return idx[nval:], idx[:nval]

In [None]:
train_idx, val_idx = train_val_split(len(mnist))

In [None]:
print(len(train_idx))
print(len(val_idx))

In [None]:
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_loader = DataLoader(mnist, 32, sampler=train_sampler)
val_loader = DataLoader(mnist, 32, sampler=val_sampler)

In [None]:
class MNISTClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.linear = nn.Linear(28*28, 10)
    
    def forward(self, xb):
        xb = xb.view(-1, 784)
        output = self.linear(xb)
        return output

In [None]:
model = MNISTClassifier()

In [None]:
print(model.linear.weight.shape)
print(model.linear.bias.shape)

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [None]:
def accuracy(pred, label):
    _, predictions = torch.max(pred, dim=1)
    return torch.sum(predictions == label).item() / len(pred)

In [None]:
def train_batch(model, loss_func, bx, by, optimizer, metric=accuracy):
    output = model(bx)       
    loss = cross_entropy(output, by)

    with torch.no_grad():
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    result = metric(output, by)
    return loss.item(), len(bx), result

In [None]:
def evaluate(model, loss_func, validation_data, optimizer, metric=accuracy):
    results = [train_batch(model, loss_func, bx, by, optimizer, metric) for bx, by in validation_data]
    losses, lens, results = zip(*results)
    total_data = np.sum(lens)
    avg_loss = np.sum(np.multiply(losses, lens)) / total_data
    avg_result = np.sum(np.multiply(results, lens)) / total_data
    
    return avg_loss, total_data, avg_result

In [None]:
def fit(model, epochs, loss_func, optimizer, train, validation, metric=accuracy):
    for e in (range(epochs)):
        for bx, by in train:
            loss, _, _ = train_batch(model, loss_func, bx, by, optimizer, metric)

        val_loss, _, val_result = evaluate(model, loss_func, validation, optimizer, metric)
        print('Epoch {}/{}: Training Loss: {:.2f}, Validation Loss: {:.2f}, Validation Metric: {:.2f}'.format(e+1, epochs, loss, val_loss, val_result))

In [None]:
fit(model, 10, cross_entropy, optimizer, train_loader, val_loader)

# A Simple Neural Network

In [None]:
class MNISTNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.linear1 = nn.Linear(in_features=28*28, out_features=128, bias=True)
        self.linear2 = nn.Linear(in_features=128, out_features=10, bias=True)
    
    def forward(self, bx):
        bx = bx.view(bx.size(0), -1)
        output = self.linear1(bx)
        output = relu(output)
        output = self.linear2(output)
        
        return output

In [None]:
model = MNISTNetwork()

In [None]:
optimizer = Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999))

In [None]:
fit(model, 10, cross_entropy, optimizer, train_loader, val_loader)