<!-- ---
reviewed_on: "2024-10-11"
--- -->

# First control

## Import necessary packages

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import torch

## Loading the dataset

Our dataset is the [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset consists of $60000$ $32 \times 32$ color images in $10$ classes, with $6000$ images per class. The classes are:

- airplane.

- automobile.

- bird.

- cat.

- deer.

- dog.

- frog.

- horse.

- ship.

- truck.

In [None]:
from torchvision import datasets, transforms


# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
								transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
								])

# Download and load the training data
trainset = datasets.CIFAR10("~/.pytorch/CIFAR-10/", download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [4]:
classes = [
			"airplane",
			"automobile",
			"bird",
			"cat",
			"deer",
			"dog",
			"frog",
			"horse",
			"ship",
			"truck"
		]

To check if the dataset is correct, we will check the number of instances per label.

In [None]:
from collections import Counter


# Extract all labels
labels = [label for _, label in trainset]

# Count the number of records per label
label_counts = Counter(labels)

for i, count in label_counts.items():
	print(f"{classes[i]}: {count}")

In [None]:
dataiter = iter(trainloader)
images, labels = next(dataiter)

print(type(images))
print(images.shape)

To display the image we could use `plt.imshow`, but that function expects the image data to have the form `(height, width, channels)`, so to use it we must manipulate the tensor.

In [84]:
img = images[0].permute(1, 2, 0).numpy() # Reorder the array and conver it
										 # into a numpy array

The `plt.imshow` also expects the input range to be in the range of $0$ to $1$, to ensure that we must normalize the image for proper display.

In [85]:
# Normalize the image to the range [0, 1]
img = (img - img.min()) / (img.max() - img.min())

In [None]:
plt.imshow(img)
plt.show()

## Manual implementation of a neural network

In the guide the implemented neural network had $256$ neurons in the hidden layer, so we will create one with the same amount, using the sigmoid activation function.

In [None]:
def activation(x):
	return 1 / (1 + torch.exp(-x))

inputs = images.view(images.shape[0], -1) # (64, 3072), 3072 = 3 * 32 * 32

W_1 = torch.randn(3072, 256)
B_1 = torch.randn(256)

W_2 = torch.randn(256, 10)
B_2 = torch.randn(10)

h = activation(torch.mm(inputs, W_1) + B_1)
output = activation(torch.mm(h, W_2) + B_2)
print(output)

Following the guide, now we will calculate the probability distribution using the softmax function.

In [None]:
def softmax(x):
	return torch.exp(x) / torch.sum(torch.exp(x), dim=1).view(-1, 1)

probabilities = softmax(output)
print(probabilities.shape)
print(probabilities.sum(dim=1))

## Building a neural network with PyTorch

In [12]:
from torch import nn

In [13]:
class Network(nn.Module):
	def __init__(self):
		super().__init__()

		self.fc1 = nn.Linear(3072, 128)
		self.fc2 = nn.Linear(128, 64)
		self.fc3 = nn.Linear(64, 10)

		self.ReLU = nn.ReLU()
		self.softmax = nn.Softmax(dim=1)

	def forward(self, x):
		x = self.fc1(x)
		x = self.ReLU(x)
		x = self.fc2(x)
		x = self.ReLU(x)
		x = self.fc3(x)
		x = self.ReLU(x)
		x = self.softmax(x)

		return x

In [None]:
model = Network()
model

### Using `torch.nn.functional`

In [None]:
import torch.nn.functional as F


class Network(nn.Module):
	def __init__(self):
		super().__init__()

		self.fc1 = nn.Linear(3072, 128)
		self.fc2 = nn.Linear(128, 64)
		self.fc3 = nn.Linear(64, 10)

	def forward(self, x):
		# Hidden layer 1 with ReLu activacion
		x = F.relu(self.fc1(x))
		# Hidden layer 2 with ReLu activacion
		x = F.relu(self.fc2(x))
		# Output layer with softmax activacion
		x = F.softmax(self.fc3(x), dim=1)

		return x

In [None]:
model = Network()
model

### Initializing weights and biases

In [None]:
model.fc1.bias.data.fill_(0)
model.fc1.weight.data.normal_(std=0.01)

### Forward pass

In [16]:
def view_classify(img, ps, classes):
	''' Function for viewing an image and its predicted classes.
	'''
	ps = ps.data.numpy().squeeze()

	fig, (ax1, ax2) = plt.subplots(figsize=(6, 9), ncols=2)
	img = img.permute(1, 2, 0) # Reorder the tensor dimensions for plt.imshow
	img = (img - img.min()) / (img.max() - img.min())
	ax1.imshow(img.numpy())
	ax1.axis('off')
	ax2.barh(np.arange(len(classes)), ps)
	ax2.set_aspect(0.1)
	ax2.set_yticks(np.arange(len(classes)))
	ax2.set_yticklabels(classes, size="small")
	ax2.set_title("Class probability")
	ax2.set_xlim(0, 1.1)

	plt.tight_layout()


In [None]:
images, labels = next(dataiter)

# Reshape images into a 1D vector, new shape is (batch size, 3072)
images = images.view(images.shape[0], -1)  # Flatten the images

print(images.shape)
# Forward pass through the network
img_idx = 0
img = images[img_idx] # (3072,)
ps = model.forward(img.unsqueeze(0)) # Add batch dimension to make it (1, 3072)

view_classify(img.view(3, 32, 32), ps, classes)

### Using `nn.Sequential`

In [None]:
# Hyperparameters for our network
input_size = 3072
hidden_sizes = [128, 64]
output_size = 10

# Build a feed-forward network
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
						nn.ReLU(),
						nn.Linear(hidden_sizes[0], hidden_sizes[1]),
						nn.ReLU(),
						nn.Linear(hidden_sizes[1], output_size),
						nn.Softmax(dim=1))

# Forward pass through the network and display output
images, labels = next(dataiter)
images = images.view(images.shape[0], -1)
img = images[0]
ps = model.forward(img.unsqueeze(0))
view_classify(img.view(3, 32, 32), ps, classes)