In [None]:
import sys
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install torchvision
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install tqdm
!{sys.executable} -m pip install scikit-learn
!{sys.executable} -m pip install imageio

In [None]:
# pytorch imports
import torch
import torchvision

from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader

from torch import optim

# numpy imports
import numpy as np

# matplotlib imports
from matplotlib import pyplot as plt

# sklearn imports
from sklearn.datasets import make_regression

# Tensors

Torch uses tensors that are nothing more than an (multi-dimensional) array or (multi-dimensional) vector. These tensors are used for efficient execution in pytorch.

We can easily go from lists to tensors:

In [None]:
x = torch.Tensor([5,3])
y = torch.Tensor([2,1])

We can do mathematical operations with our tensors:

In [None]:
print(x*y)

A lot of numpy like syntax can be used. For example to initialize tensors:

In [None]:
tensor_zero = torch.zeros([2,5])
tensor_one = torch.ones([2,5])
tensor_random = torch.rand([2,5])

Or to get the shape of our tensor:

In [None]:
print(tensor_random)
print(tensor_random.shape)

Reshape is a bit different (pytorch uses view):

In [None]:
print(tensor_random.shape)
print(tensor_random.view([1,10]).shape)

# Reading data

We will use the MNIST data set that is known relatively well in the ML/DL community. THe data set contains images of written numbers (X) and the numerical value we want to predict (y):

![alt text](https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png "Title")https://en.wikipedia.org/wiki/MNIST_database

The following function is used to get the data:

![alt text](figs/mnist.PNG "Title")https://pytorch.org/vision/0.8/datasets.html#mnist

Lets get the data (it is not in a tensor form yet, so we need to transform it, more on this later):

In [None]:
train = torchvision.datasets.MNIST(
    "",
    train=True,
    download=True,
    transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
)

test = torchvision.datasets.MNIST(
    "",
    train=False,
    download=True,
    transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
)

In [None]:
print(train)

Prepare the MNIST object for training. We need to indicate a batch size and we can shuffle the input data:

![alt text](figs/dataloader.PNG "Title")https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader

In [None]:
trainset = torch.utils.data.DataLoader(
    train,
    batch_size=10,
    shuffle=True
)

testset = torch.utils.data.DataLoader(
    test,
    batch_size=10,
    shuffle=True
)

The generated train set is a DataLoader object we can iterate over:

In [None]:
print(trainset)
print(dir(trainset))

Each iteration we can get a data batch that contains our X and y, and these objects (just like numpy) can be sliced:

In [None]:
for data in trainset:
    print("Batch:")
    print(data)
    print("Data point:")
    print(data[0][0])
    print("Shape:")
    print(data[0][0].shape)
    break

Lets take a point to plot:

In [None]:
x_i,y_i = data[0][0], data[1][0]

In [None]:
print(x_i,y_i)

In [None]:
plt.imshow(x_i)

![alt text](https://i.redd.it/gwuw7zd01gd31.jpg "Title")

We need to transform so we lose the "1" dimension and we have a matrix of 28x28

In [None]:
print(y_i)
plt.imshow(x_i.view(28,28))
plt.show()

# Build a neural network

Now that we have the data lets build our network:

In [None]:
# define class, inherits from "nn.Module"
class Net(nn.Module):
    # specify initializitation instructions
    def __init__(self):
        # run initialization from "nn.Module"
        super().__init__()
        
        # define the fully connected layers
        # the "28*28" is the flattened dimensions of our image
        # the output equals the amount of classes we want to predict
        self.fc = nn.Linear(28*28, 10)
    
    # define forward propagation
    def forward(self,x):
        # input x goes through our fully connected layer
        # the "F.sigmoid" defines the activation function we pass the data through
        x = F.sigmoid(self.fc(x))
        
        # return the output with a softmax function
        return F.log_softmax(x, dim=1)

We can see the architecture here:

In [None]:
net = Net()
print(net)

Lets pass a random tensor throught this network

In [None]:
x_random = torch.rand([28,28])

net(x_random)

![alt text](https://memeguy.com/photos/images/when-your-hot-friend-send-the-pic-to-you-instead-and-says-that-was-a-mistake-247792.jpg "Title")


We need to reshape or flatten the matrix:

In [None]:
net(x_random.view(28*28))

![alt text](http://memes.ucoz.com/_nw/23/92692556.jpg "Title")


Our network does not expect a single example, so we need to reshape as if we had multiple examples:

In [None]:
net(x_random.view(1,28*28))

![alt text](https://gray-wilx-prod.cdn.arcpublishing.com/resizer/TNIRzaTbaM20cxSJYazl6zt8btM=/1200x675/smart/cloudfront-us-east-1.images.arcpublishing.com/gray/X7CY2J6TIJNB7N4IAIDYKPGKFM.jpg "Title")



# Training

Define the optimizer:

In [None]:
# say what should be optimized (net.parameters()) and set a learning rate (=lr)
optimizer = optim.Adam(net.parameters(), lr=1e-3)

Lets get training:

In [None]:
# define number of passes we want to go through our data
num_epochs = 3

for epoch in range(num_epochs):
    for data in trainset:
        # get the tensors we will use
        X, y = data
        
        # set gradients to zero, so that batches are seperated and we do not 
        # continue gradient calculations of the previous batch
        net.zero_grad()
        
        # as before make sure we put in the right dimensions
        output = net(X.view(-1, 28*28))
        
        # calculate the (negative log) loss based on output predictions and actual values y
        loss = F.nll_loss(output, y)
        
        # a magic function that does backpropagation
        loss.backward()
        
        # update the network parameters
        optimizer.step()
    
    print(loss)

# Build a multilayered neural network

In [None]:
# define class, inherits from "nn.Module"
class Net(nn.Module):
    # specify initializitation instructions
    def __init__(self):
        # run initialization from "nn.Module"
        super().__init__()
        
        # define the fully connected layers
        # the "28*28" is the flattened dimensions of our image
        # the output equals the amount of classes we want to predict
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)
    
    # define forward propagation
    def forward(self,x):
        # input x goes through our fully connected layer
        # the "F.sigmoid" defines the activation function we pass the data through
        x = F.relu(self.fc1(x))
        
        # note how we change X to be the activation function of layer 1 and use this as an
        # input here
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.sigmoid(self.fc4(x))
        
        # return the output with a softmax function
        return F.log_softmax(x, dim=1)

In [None]:
net = Net()
print(net)

In [None]:
optimizer = optim.Adam(net.parameters(), lr=1e-3)

# define number of passes we want to go through our data
num_epochs = 3

for epoch in range(num_epochs):
    for data in trainset:
        # get the tensors we will use
        X, y = data
        
        # set gradients to zero, so that batches are seperated and we do not 
        # continue gradient calculations of the previous batch
        net.zero_grad()
        
        # as before make sure we put in the right dimensions
        output = net(X.view(-1, 28*28))
        
        # calculate the (negative log) loss based on output predictions and actual values y
        loss = F.nll_loss(output, y)
        
        # a magic function that does backpropagation
        loss.backward()
        
        # update the network parameters
        optimizer.step()
    
    print(loss)

In [None]:
correct = 0
total = 0

# make sure we do not optimize any gradients
with torch.no_grad():
    for data in trainset:
        X, y = data
        output = net(X.view(-1,28*28))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
print("Correctly assigned:",correct,"out of",total)

In [None]:
correct = 0
total = 0

# make sure we do not optimize any gradients
with torch.no_grad():
    for data in testset:
        X, y = data
        output = net(X.view(-1,28*28))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
print("Correctly assigned:",correct,"out of",total)

# Apply some regularization with dropout

In [None]:
# define class, inherits from "nn.Module"
class Net(nn.Module):
    # specify initializitation instructions
    def __init__(self):
        # run initialization from "nn.Module"
        super().__init__()
        
        # define the fully connected layers
        # the "28*28" is the flattened dimensions of our image
        # the output equals the amount of classes we want to predict
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.dropout = nn.Dropout(0.25)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)
        
    
    # define forward propagation
    def forward(self,x):
        # input x goes through our fully connected layer
        # the "F.sigmoid" defines the activation function we pass the data through
        x = F.relu(self.fc1(x))
        
        # note how we change X to be the activation function of layer 1 and use this as an
        # input here
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        
        # return the output with a softmax function
        return F.log_softmax(x, dim=1)

In [None]:
net = Net()
print(net)

In [None]:
optimizer = optim.Adam(net.parameters(), lr=1e-3)

# define number of passes we want to go through our data
num_epochs = 3

for epoch in range(num_epochs):
    for data in trainset:
        # get the tensors we will use
        X, y = data
        
        # set gradients to zero, so that batches are seperated and we do not 
        # continue gradient calculations of the previous batch
        net.zero_grad()
        
        # as before make sure we put in the right dimensions
        output = net(X.view(-1, 28*28))
        
        # calculate the (negative log) loss based on output predictions and actual values y
        loss = F.nll_loss(output, y)
        
        # a magic function that does backpropagation
        loss.backward()
        
        # update the network parameters
        optimizer.step()
    
    print(loss)

# Regression example

In [None]:
X_sklearn, y_sklearn = make_regression(n_samples=500, n_features=1, bias=2, noise=10)

In [None]:
X = torch.from_numpy(abs(X_sklearn)).float()
y = torch.from_numpy(abs(y_sklearn)/100).float()

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer = torch.nn.Linear(1, 1)

    def forward(self, x):
        x = self.layer(x)      
        return x

net = Net()
print(net)

In [None]:
net = Net()
num_epochs = 25
batch_size = 50
optimizer = optim.SGD(net.parameters(), lr=1e-2)
loss_func = torch.nn.MSELoss()
fig, ax = plt.subplots(figsize=(10,10))

# create batches
trainset = torch.utils.data.DataLoader(
    # create a tensor data set!
    TensorDataset(X,y),
    batch_size=batch_size,
    shuffle=True
)

for epoch in range(num_epochs):
    for data in trainset:
        X_batch, y_batch = data
        
        # set gradients to zero, so that batches are seperated and we do not 
        # continue gradient calculations of the previous batch
        net.zero_grad()
        
        prediction = net(X_batch)
        loss = loss_func(prediction.view(batch_size), y_batch)
        
        print("Batch loss: ",loss)
        
        loss.backward()
        optimizer.step()
        
plt.scatter(X_batch.data.numpy(), y_batch.data.numpy(), color = "orange")
plt.plot(X_batch.data.numpy(), prediction.data.numpy(), 'g-', lw=3)
plt.show()

# GPU learning

Up till now everything was run on the CPU, for larger and more complex data sets we should use the GPU.

In [None]:
print(torch.cuda.is_available())

Select the specific GPU device (you can also list devices available)

In [None]:
device = torch.device("cuda:0")
print(device)

Test if we should run on the GPU or CPU

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Using the GPU")
else:
    device = torch.device("cpu")
    print("Using the CPU")

In [None]:
# define class, inherits from "nn.Module"
class Net(nn.Module):
    # specify initializitation instructions
    def __init__(self):
        # run initialization from "nn.Module"
        super().__init__()
        
        # define the fully connected layers
        # the "28*28" is the flattened dimensions of our image
        # the output equals the amount of classes we want to predict
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.dropout = nn.Dropout(0.25)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)
        
    
    # define forward propagation
    def forward(self,x):
        # input x goes through our fully connected layer
        # the "F.sigmoid" defines the activation function we pass the data through
        x = F.relu(self.fc1(x))
        
        # note how we change X to be the activation function of layer 1 and use this as an
        # input here
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        
        # return the output with a softmax function
        return F.log_softmax(x, dim=1)

In [None]:
net = Net().to(device)

In [None]:
train = torchvision.datasets.MNIST(
    "",
    train=True,
    download=True,
    transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
)

In [None]:
trainset = torch.utils.data.DataLoader(
    train,
    batch_size=10,
    shuffle=True
)

In [None]:
optimizer = optim.Adam(net.parameters(), lr=1e-3)

# define number of passes we want to go through our data
num_epochs = 3

for epoch in range(num_epochs):
    for data in trainset:
        # get the tensors we will use
        X, y = data
        
        # set gradients to zero, so that batches are seperated and we do not 
        # continue gradient calculations of the previous batch
        net.zero_grad()
        
        # as before make sure we put in the right dimensions
        output = net(X.view(-1, 28*28))
        
        # calculate the (negative log) loss based on output predictions and actual values y
        loss = F.nll_loss(output, y)
        
        # a magic function that does backpropagation
        loss.backward()
        
        # update the network parameters
        optimizer.step()
    
    print(loss)

Everything needs to be on the GPU...

![alt text](https://media1.tenor.com/images/48290b257ad7e7fa0a832f525b39818c/tenor.gif?itemid=13782932 "Title")

In [None]:
optimizer = optim.Adam(net.parameters(), lr=1e-3)

# define number of passes we want to go through our data
num_epochs = 3

for epoch in range(num_epochs):
    for data in trainset:
        # get the tensors we will use
        X, y = data
        
        # set gradients to zero, so that batches are seperated and we do not 
        # continue gradient calculations of the previous batch
        net.zero_grad()
        
        # as before make sure we put in the right dimensions
        output = net(X.view(-1, 28*28).to(device))
        
        # calculate the (negative log) loss based on output predictions and actual values y
        loss = F.nll_loss(output, y.to(device))
        
        # a magic function that does backpropagation
        loss.backward()
        
        # update the network parameters
        optimizer.step()
    
    print(loss)