# **_PyTorch Basics_**

#### _https://www.youtube.com/playlist?list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4_

#### __*Call the Libraries & GPU*__

In [None]:
import math
import torch
import torchvision  # 1st use: Dataset & Dataloader
import torch.nn as nn  # 1st use: Training Pipeline
import torch.nn.functional as F  # 1st use: Activation Functions
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets  # 1st use: Linear Regression
from sklearn.preprocessing import StandardScaler  # 1st use: Logistic Regression
from sklearn.model_selection import train_test_split  # 1st use: Logistic Regression
from torch.utils.data import Dataset, DataLoader  # 1st use: Dataset & Dataloader

# Device config (Pick your set-up)
GPU = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # NVIDIA GPUs
# GPU = torch.device('mps' if torch.has_mps else 'cpu')  # ARM GPUs (M1, M2, ...)
print('Using the Processor') if GPU == torch.device('cpu') else print('Using the Graphics Card')

In [None]:
# Test the GPU (mainly for ARM GPU users)
dtype = torch.float32

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=GPU, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=GPU, dtype=dtype)
b = torch.randn((), device=GPU, dtype=dtype)
c = torch.randn((), device=GPU, dtype=dtype)
d = torch.randn((), device=GPU, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 500 == 0:
        print(f'Epoch: {t} | Loss: {loss:.4f}')

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item():.4f} + {b.item():.4f}x + '
      f'{c.item():.4f}x^2 + {d.item():.4f}x^3')

#### __*Create a Tensor and basic operations*__

In [None]:
x = torch.tensor([[1, 1],[1, 1]], dtype=torch.float64)
print(x)
print(x.dtype)
y = torch.rand(2, 2)
print(y)
print(f'{y.dtype}\n')
# z = x + y
# print(z)

# y = y.add(x)
# y = torch.add(x, y) # add(+), sub(-), mul(*), div(/)
y.add_(x)  # in-place operation (same as line 10)
print(y)

#### __*Select and view certain stuff*__

In [None]:
x = torch.rand(5, 4)
print(x, x.size())
print(x[:, 0])  # print 1st column
print(x[1, :])  # print 2nd row
print(x[0, 0])
print(x[0, 0].item())  # print a direct value

y = x.view(5*4)  # re-shapes the tensor into 1D
print(y, y.size())

y = x.view(-1, 10)  # calculate the other value
print(y, y.size())

#### __*Convert from torch to numpy and vice-versa*__

In [None]:
x = torch.rand(5)
print(x, type(x))
y = x.numpy()  # y shares the location with x, changing x will change y as well
print(f'{y} {type(y)}\n')

x = np.ones(5)
print(x, type(x))
y = torch.from_numpy(x)  # y shares the location with x, changing x will change y as well
print(y, type(y))

#### __*Create a Tensor on the GPU*__

In [None]:
x = torch.rand(5, device=GPU)  # cannot be converted to numpy
y = torch.rand(5)
y = y.to(GPU)
z = x + y
z = z.to("cpu")

try:
    x = x.numpy()
except TypeError:
    print("Cannot covert to GPU Tensor Numpy")

try:
    z = z.numpy()
    print(z, type(z))
except TypeError:
    print("Cannot covert to GPU Tensor Numpy")

#### __*AutoGrad*__

In [None]:
x = torch.randn(3, requires_grad=True)
print(x)
# ⌄⌄⌄ These methods will remove grad
# x.requires_grad_(False)
# x.detach_()
# with torch.no_grad():
#   y = x + 2

y = x + 2
print(y)

z = y * y * 2
z = z.mean()
print(z)

z.backward()  # dz/dx
print(x.grad)

# Dummy Function
weights = torch.randn(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

    weights.grad.zero_()  # we have to clear the grad

#### __*Backpropagation*__

In [None]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y)**2
print(loss)

# backward pass
loss.backward()
print(w.grad)

### update weights
### next forward and backward

#### __*Manual Gradient Descent*__

In [None]:
# f = w * x
# f = 2 * x

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0


# model prediction
def forward(x):
    return w * x


# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y) ** 2).mean()


# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dw = 1/N * 2x * (w*x - y)
def gradiant(x, y, y_predicted):
    return np.dot(2 * x, y_predicted - y).mean()


print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    L = loss(Y, y_pred)

    # gradients
    dw = gradiant(X, Y, y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

#### __*Automatic Gradient Descent*__

In [None]:
# f = w * x
# f = 2 * x

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


# model prediction
def forward(x):
    return w * x


# loss = MSE
def loss(y, y_predicted):
    return ((y_predicted - y) ** 2).mean()


print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 80

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    L = loss(Y, y_pred)

    # gradients = backward pass
    L.backward()  # dL/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad

    # !!! ZERO the gradients !!!
    w.grad.zero_()

    if epoch % int(n_iters/10) == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

#### __*Training Pipeline (Manual Prediction)*__

In [None]:
# 1.) Design model (input, output size, forward pass)
# 2.) Construct loss and optimizer
# 3.) Training loop
#       - forward pass: compute prediction
#       - backward pass: gradients
#       - update weights

# f = w * x
# f = 2 * x

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


# model prediction
def forward(x):
    return w * x


print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 80

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    L = loss(Y, y_pred)

    # gradients = backward pass
    L.backward()  # dL/dw

    # update weights
    optimizer.step()

    # !!! ZERO the gradients !!!
    optimizer.zero_grad()

    if epoch % int(n_iters/10) == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

#### __*Training Pipeline (Automatic Prediction)*__

In [None]:
# 1.) Design model (input, output size, forward pass)
# 2.) Construct loss and optimizer
# 3.) Training loop
#       - forward pass: compute prediction
#       - backward pass: gradients
#       - update weights

# f = w * x
# f = 2 * x

X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32) #, device=GPU)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32) #, device=GPU)

X_test = torch.tensor([5], dtype=torch.float32) #, device=GPU)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features


# model = nn.Linear(input_size, output_size) #, device=GPU)
class LinearRegression(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)


model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.001
n_iters = 30000

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    # loss
    L = loss(Y, y_pred)

    # gradients = backward pass
    L.backward()  # dL/dw

    # update weights
    optimizer.step()

    # !!! ZERO the gradients !!!
    optimizer.zero_grad()

    if epoch % int(n_iters/10) == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {L:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

#### __*Linear Regression*__

In [None]:
# 0.) Data Preparation
X_numpy, y_numpy = datasets.make_regression(n_samples=100, n_features=1, noise=20, random_state=1)

X = torch.from_numpy(X_numpy.astype(np.float32))
y = torch.from_numpy(y_numpy.astype(np.float32))
y = y.view(y.shape[0], 1)

n_samples, n_features = X.shape

# 1.) Model
input_size = n_features
output_size = 1
model = nn.Linear(input_size, output_size)

# 2.) Loss & Optimizer
learning_rate = 0.01
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3.) Training Loop
num_epochs = 250
for epoch in range(num_epochs):
    # Forward Pass & Loss
    y_predicted = model(X)
    L = criterion(y_predicted, y)
    # Backward Pass
    L.backward()
    # Update
    optimizer.step()
    # !!! Zero Gradients !!!
    optimizer.zero_grad()

    if epoch % int(num_epochs/10) == 0:
        print(f'epoch {epoch + 1}: loss = {L.item():.8f}')

# Plot
predicted = model(X).detach().numpy()
plt.plot(X_numpy, y_numpy, 'ro')
plt.plot(X_numpy, predicted, 'b')
plt.show()

#### __*Logistic Regression*__

In [None]:
# 0.) Data Preparation
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

n_samples, n_features = X.shape

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# Scale
sc = StandardScaler()  # recommended with Logistic Regression
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

X_train = torch.from_numpy(X_train.astype(np.float32))
# X_train = X_train.to(GPU)
X_test = torch.from_numpy(X_test.astype(np.float32))
# X_test = X_test.to(GPU)
y_train = torch.from_numpy(y_train.astype(np.float32))
# y_train = y_train.to(GPU)
y_test = torch.from_numpy(y_test.astype(np.float32))
# y_test = y_test.to(GPU)

y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)


# 1.) Model
# f = wx + b, Sigmoid at the End
class LogisticRegression(nn.Module):

    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)

    def forward(self, x):
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted


model = LogisticRegression(n_features)
# model = model.to(GPU)

# 2.) Loss & Optimizer
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3.) Training Loop
num_epochs = 10000
for epoch in range(num_epochs):
    # Forward Pass & Loss
    yPredicted = model(X_train)
    L = criterion(yPredicted, y_train)
    # Backward Pass
    L.backward()
    # Update
    optimizer.step()
    # !!! Zero Gradients !!!
    optimizer.zero_grad()

    if epoch % int(num_epochs/10) == 0:
        print(f'epoch {epoch + 1}: loss = {L.item():.8f}')

with torch.no_grad():
    yPredicted = model(X_test)
    yPredicted_cls = yPredicted.round()
    acc = yPredicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f'Accuracy = {acc:.4f}')

#### __*Dataset & Dataloader*__
<div class="alert alert-block alert-success">

__epoch = 1 forward & backward pass of ALL training samples__

__batch_size = number of training samples in one forward & backward pass__

__number of iterations = number of passes, each pass using [batch_size] number of samples__

__e.g. 100 samples, batch_size=20 --> 100/20 = 5 iterations per 1 epoch?__
</div>

In [None]:
class WineDataset(Dataset):

    def __init__(self):
        # Load Data
        xy = np.loadtxt('./datasets/wine.csv', delimiter=',',dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]])  # n_samples, 1
        self.n_samples = xy.shape[0]

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples


WineDataset = WineDataset()

first_data = WineDataset[0]
features, labels = first_data
print(features, labels)


# !!! IF YOU GET AN ERROR DURING LOADING, SET num_workers TO 0 !!!
train_loader = DataLoader(dataset=WineDataset,  # Load whole dataset with DataLoader
                          batch_size=4,
                          shuffle=True,  # shuffle: shuffle data, good for training
                          num_workers=0)  # num_workers: faster loading with multiple subprocesses

# convert to an iterator and look at one random sample
dataiter = iter(train_loader)
data = next(dataiter)
features, labels = data
print(features, labels)

#### __*Dataset Transforms*__

_https://pytorch.org/vision/stable/transforms.html_

In [None]:
class WineDataset(Dataset):

    def __init__(self, transform=None):
        # Load Data
        xy = np.loadtxt('D:./datasets/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.n_samples = xy.shape[0]

        self.x = xy[:, 1:]
        self.y = xy[:, [0]]

        self.transform = transform

    def __getitem__(self, index):
        sample = self.x[index], self.y[index]

        if self.transform:
            sample = self.transform(sample)

        return sample

    def __len__(self):
        return self.n_samples


class ToTensor:
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)


class MulTransform:
    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):
        inputs, targets = sample
        inputs *= self.factor
        return inputs, targets


WineDatasetNumpy = WineDataset(transform=None)
first_data = WineDatasetNumpy[0]
features, labels = first_data
print(type(features), type(labels))
print(f'{features}\n')

WineDatasetTensor = WineDataset(transform=ToTensor())
first_data = WineDatasetTensor[0]
features, labels = first_data
print(type(features), type(labels))
print(f'{features}\n')

# Combine more transformations
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])
WineDatasetComposed = WineDataset(transform=composed)
first_data = WineDatasetComposed[0]
features, labels = first_data
print(type(features), type(labels))
print(features)

#### __*Softmax*__
<div class="alert alert-block alert-success">

__Softmax applies the exponential function to each element, and normalizes__
__by dividing by the sum of all these exponentials__
__-> squashes the output to be between 0 and 1 = probability__
__sum of all probabilities is 1__
</div>

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)


x = np.array([2.0, 1.0, 0.1])
outputs = softmax(x)
print(f'Softmax Numpy: {outputs}')

x = torch.tensor([2.0, 1.0, 0.1])
outputs = torch.softmax(x, dim=0)
print(f'Softmax Torch: {outputs}')

#### __*Cross-Entropy*__
<div class="alert alert-block alert-success">

__Cross-entropy loss, or log loss,__
__measures the performance of a classification model__
__whose output is a probability value between 0 and 1.__
__-> loss increases as the predicted probability diverges from the actual label__
</div>

In [None]:
'''
         -> 2.0              -> 0.65
  Linear -> 1.0  -> Softmax  -> 0.25   -> CrossEntropy(y, y_hat)
         -> 0.1              -> 0.1

      scores(logits)      probabilities
                            sum = 1.0
'''

In [None]:
# Numpy Cross-Entropy
def cross_entropy(actual, predicted):
    loss = -np.sum(actual * np.log(predicted))
    return loss  # / float(predicted.shape[0])


# y must be one hot encoded
# if class 0: [1 0 0]
# if class 1: [0 1 0]
# if class 2: [0 0 1]
Y = np.array([1, 0, 0])

Y_pred_good = np.array([0.7, 0.2, 0.1])
Y_pred_bad = np.array([0.1, 0.3, 0.6])
L1 = cross_entropy(Y, Y_pred_good)
L2 = cross_entropy(Y, Y_pred_bad)
print(f'Loss1 Numpy: {L1:.4f}')
print(f'Loss2 Numpy: {L2:.4f}')



# Torch Cross-Entropy
L = nn.CrossEntropyLoss()
'''
Y = torch.tensor([0]) # 1 samples
# n_samples x n_classes = 1x3
Y_pred_good = torch.tensor([[2.0, 1.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3]])
'''
Y = torch.tensor([2, 0, 1])  # 3 samples
# n_samples x n_classes = 3x3
Y_pred_good = torch.tensor([[0.2, 1.0, 2.1], [2.0, 1.0, 0.1], [1.0, 3.0, 0.1]])
Y_pred_bad = torch.tensor([[0.5, 2.0, 0.3], [0.5, 1.0, 2.1], [2.0, 1.0, 0.1]])

L1 = L(Y_pred_good, Y)
L2 = L(Y_pred_bad, Y)

print(f'Loss1 Torch: {L1.item():.4f}')
print(f'Loss2 Torch: {L2.item():.4f}')

_, predictions1 = torch.max(Y_pred_good, 1)
_, predictions2 = torch.max(Y_pred_bad, 1)
print(f'Prediction1: {predictions1}')
print(f'Prediction2: {predictions2}')



# Binary Classification Dummy Function
class NeuralNet1(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet1, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # sigmoid at the end
        y_pred = torch.sigmoid(out)
        return y_pred


model = NeuralNet1(input_size=28 * 28, hidden_size=5)
criterion = nn.BCELoss()


# Multiclass Problem Dummy Function
class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet2, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        # no softmax at the end
        return out


model = NeuralNet2(input_size=28 * 28, hidden_size=5, num_classes=3)
criterion = nn.CrossEntropyLoss()  # (applies Softmax)

#### __*Activation Functions*__

In [None]:
x = torch.tensor([-1.0, 1.0, 2.0, 3.0])

# Softmax
output = torch.softmax(x, dim=0)
print(f'Softmax: {output}')
sm = nn.Softmax(dim=0)
output = sm(x)
print(f'Softmax: {output}')

# Sigmoid
output = torch.sigmoid(x)
print(f'Sigmoid: {output}')
s = nn.Sigmoid()
output = s(x)
print(f'Sigmoid: {output}')

# Tanh
output = torch.tanh(x)
print(f'Tanh: {output}')
t = nn.Tanh()
output = t(x)
print(f'Tanh: {output}')

# ReLU
output = torch.relu(x)
print(f'ReLU: {output}')
relu = nn.ReLU()
output = relu(x)
print(f'ReLU: {output}')

# Leaky ReLU
output = F.leaky_relu(x)
print(f'LeakyReLU: {output}')
l_relu = nn.LeakyReLU()
output = l_relu(x)
print(f'LeakyReLU: {output}')

# nn.ReLU() creates a nn.Module which you can add e.g. to a nn.Sequential model.
# torch.relu on the other side is just the functional API call to the relu function,
# so that you can add it e.g. in your forward method yourself.


# option 1 (create nn modules)
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        out = self.sigmoid(out)
        return out


# option 2 (use activation functions directly in forward pass)
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NeuralNet, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out = torch.relu(self.linear1(x))
        out = torch.sigmoid(self.linear2(out))
        return out

#### __*Feed-Forward Neural Net*__


In [None]:
# Hyper-parameters
input_size = 784  # 28*28 image
hidden_size = 500
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./datasets', train=True,
                                           transform=torchvision.transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./datasets', train=False,
                                          transform=torchvision.transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size)

# Check the size
examples = iter(train_loader)
samples, labels = next(examples)
#print(samples.shape, labels.shape)

# Show the images
#for i in range(6):
#    plt.subplot(2, 3, i + 1)
#    plt.imshow(samples[i][0], cmap='gray')
#plt.show()


# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        return out


model = NeuralNet(input_size, hidden_size, num_classes).to(GPU)

# Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Images = 100, 1, 28, 28
        # We need: 100, 784
        #images = images.reshape(-1, samples.shape[2]*samples.shape[3]).to(GPU)
        images = images.reshape(-1, 28*28).to(GPU)
        labels = labels.to(GPU)

        # Forward Pass
        outputs = model(images)  # Get images
        loss = criterion(outputs, labels)  # Predicted outputs

        # Backward Pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print some information
        n_steps = 10  # Number of print-outs
        if (i + 1) % int(1000/n_steps) == 0:
            print(f'Epoch {epoch + 1} / {num_epochs} | Step {i+1} / {n_total_steps} | '
                  f'Loss: {loss.item():.4f}')


# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(GPU)
        labels = labels.to(GPU)
        outputs = model(images)

        # Value, Index
        _, predictions = torch.max(outputs, 1)
        n_samples += labels.shape[0]
        n_correct += (predictions == labels).sum().item()

    accuracy = 100.0 * n_correct / n_samples
    print(f'Accuracy of the model: {accuracy}%')

#### __*Convolutional Neural Net (CNN)*__

In [None]:
# Hyper-parameters
num_epochs = 5
batch_size = 4
learning_rate = 0.001

# Dataset has PILImage images of range [0, 1].
# We transform them to Tensors of normalized range [-1, 1]
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# CIFAR10: 60000 32x32 color images in 10 classes, with 6000 images per class
train_dataset = torchvision.datasets.CIFAR10(root='./datasets', train=True,
                                             transform=transform, download=True)

test_dataset = torchvision.datasets.CIFAR10(root='./datasets', train=False,
                                            transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


def imshow(img):
    img = img / 2 + 0.5  # un-normalize
    np_img = img.numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))
    plt.show()


# Get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Show images
#imshow(torchvision.utils.make_grid(images))


class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # input = 3 colour channels, output = 6 (our choice), filter = 5*5 (our choice)
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)  # take 2*2, then move 2 px
        # input = 6 channels, output = 16 (our choice), filter = 5*5 (our choice)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # -> n, 3, 32, 32
        x = self.pool(F.relu(self.conv1(x)))  # -> n, 6, 14, 14
        x = self.pool(F.relu(self.conv2(x)))  # -> n, 16, 5, 5
        x = x.view(-1, 16 * 5 * 5)            # -> n, 400
        x = F.relu(self.fc1(x))               # -> n, 120
        x = F.relu(self.fc2(x))               # -> n, 84
        x = self.fc3(x)                       # -> n, 10
        return x


model = ConvNet().to(GPU)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Origin shape: [4, 3, 32, 32] = 4, 3, 1024
        # input_layer: 3 input channels, 6 output channels, 5 kernel size
        images = images.to(GPU)
        labels = labels.to(GPU)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward & optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 2000 == 0:
            print(f'Epoch {epoch + 1} / {num_epochs} | Step {i+1} / {n_total_steps} | '
                  f'Loss: {loss.item():.4f}')

print('Finished Training')
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    n_class_correct = [0 for i in range(10)]
    n_class_samples = [0 for i in range(10)]
    for images, labels in test_loader:
        images = images.to(GPU)
        labels = labels.to(GPU)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

        for i in range(batch_size):
            label = labels[i]
            pred = predicted[i]
            if label == pred:
                n_class_correct[label] += 1
            n_class_samples[label] += 1

    accuracy = 100.0 * n_correct / n_samples
    print(f'Accuracy of the Model: {accuracy} %')

    for i in range(10):
        acc = 100.0 * n_class_correct[i] / n_class_samples[i]
        print(f'Accuracy of {classes[i]}: {acc} %')

__Want to learn more about CNNs?__ <br>
__[Stanford Lecture (Video)](https://youtu.be/bNb2fEVKeEo)__ <br>
__[deeplizard (Video)](https://youtu.be/YRhxdVk_sIs)__ <br>
__[GitHub (Article)](https://cs231n.github.io/convolutional-networks/)__ <br>
__[Machine Learning Mastery (Article)](https://machinelearningmastery.com/convolutional-layers-for-deep-learning-neural-networks/)__ <br> <br>