# NN BASICS

In [1]:
import os
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Helpers

In [170]:
def calculateOutput(layer:torch.nn.modules.linear.Linear, input_arr:np.ndarray,
            is_relu=True)->np.ndarray:
    """
    Calculates the output from a liner layer.
       N - number of samples
       O - number of outputs
       I - number of inputs

    Args:
        layer: Linear layer
        input_arr: np.Array (N X I)

    Returs
        np.array: N X O
    """
    num_input = layer.in_features
    num_output = layer.out_features
    # Get the weights
    A_tensor = list(layer.named_parameters())[0][1]
    #
    b_tensor = torch.detach((list(layer.named_parameters())[1][1])).clone()
    #
    weight_product_tensor = A_tensor.detach().matmul(input_arr)  # rows are outputs, columns are samples
    result = weight_product_tensor + b_tensor.reshape(num_output, 1)
    if is_relu:
        result = nn.ReLU()(result)
    return result.detach().numpy().T
    
######### TESTS
# Setup
test_data_arr = np.array([ [1, 2],
                      [10, 20],
                      [100, 200],
                         ], dtype=float)
input_arr = torch.tensor(test_data_arr.T).to(torch.float32)
layer = nn.Linear(2, 4)
layer_tensor = layer(input_arr.T).detach()
#
result = calculateOutput(layer, input_arr, is_relu=False)
assert(np.sum((result - layer_tensor.numpy())**2) <= 1e-5)
# With relu
result = calculateOutput(layer, input_arr, is_relu=True)
assert(np.sum((result - nn.ReLU()(layer_tensor).numpy())**2) <= 1e-5)
print("OK!")

OK!


In [156]:
dir(layer)

['T_destination',
 '__annotations__',
 '__call__',
 '__class__',
 '__constants__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_apply',
 '_backward_hooks',
 '_backward_pre_hooks',
 '_buffers',
 '_call_impl',
 '_compiled_call_impl',
 '_forward_hooks',
 '_forward_hooks_always_called',
 '_forward_hooks_with_kwargs',
 '_forward_pre_hooks',
 '_forward_pre_hooks_with_kwargs',
 '_get_backward_hooks',
 '_get_backward_pre_hooks',
 '_get_name',
 '_is_full_backward_hook',
 '_load_from_state_dict',
 '_load_state_dict_post_hooks',
 '_load_state_dict_pre_hooks',
 '_maybe_warn_non_full_backward_hook',
 '_modules',
 '_named_members',
 '_non_per

# Get GPU

In [2]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using mps device


# Basic Network

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(2, 3),
            nn.ReLU(),
            nn.Linear(3, 4),
            nn.ReLU(),
            nn.Linear(4, 5),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=2, out_features=3, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=4, bias=True)
    (3): ReLU()
    (4): Linear(in_features=4, out_features=5, bias=True)
  )
)


In [5]:
X = torch.rand(1, 2, 1, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([3], device='mps:0')


In [6]:
pred_probab

tensor([[0.1628, 0.1750, 0.2248, 0.2270, 0.2103]], device='mps:0',
       grad_fn=<SoftmaxBackward0>)

In [122]:
pred_probab.argmax(0)

tensor([0, 0, 0, 0, 0], device='mps:0')

In [7]:
named_parameters = list(model.named_parameters())
[(x[0], len(x[1])) for x in named_parameters]

[('linear_relu_stack.0.weight', 3),
 ('linear_relu_stack.0.bias', 3),
 ('linear_relu_stack.2.weight', 4),
 ('linear_relu_stack.2.bias', 4),
 ('linear_relu_stack.4.weight', 5),
 ('linear_relu_stack.4.bias', 5)]

# NN Operation

NN operation is a matrix multiplication. ReLU provides thresholding.
One challenge is managing the inputs to and outputs from a neuron.