In [None]:
import pprint
import requests
import time

import numpy as np
import torch
import torchvision

import matplotlib.pyplot as plt
import torch.nn.functional as F

from io import BytesIO
from PIL import Image

# Lakota AI Code Camp Lesson 08: Matrix Algebra IV

## Matrix Multiplication

An important concept in mathematics and computer science is composition.
Recall that a function takes an input and gives an output.
So, if we compose two functions, say $f$ and $g$, denoted by $f \circ g (x) = f(g(x))$, then the input of $f$ is the output of $g$.

Often, we want to compose two linear transformations.
If $\textbf{A} = (a_{ij})_{ij}$ is a matrix for the linear transformation $T$ and $\textbf{B} = (b_{ij})_{ij}$ is a matrix for the linear transformation $S$, then $T(S(\textbf{v}))$ corresponds to the vector
$$
\left(
    \sum_{j=1}^{n} \sum_{k = 1}^{m} a_{ik}b_{kj} v_{j}
\right)_{i}
$$

The matrix corresponding to the linear transformation $T\circ S$ is
$$
\left(
    \sum_{k = 1}^{m} a_{ik}b_{kj}
\right)_{ij} = \textbf{A} \cdot \textbf{B}.
$$
Another way to think about it is that matrix multiplication comes from the dot products of the rows of the first matrix with the columns of the second matrix.
The dot product method is why we implemented the transpose method of the `Matrix` class.

We can also do matrix multiplication with a series of matrix vector operations.
The columns of the new matrix come from the multiplication of the matrix $\textbf{A}$ with the column vectors.

So, you have a number of different ways to think about and implement matrix multiplication!

In [None]:
class Vector():

    def __init__(self, values):
        # This should have an input.
        # What do we need to input to create a vector?
        self.values = tuple(values)

    def __getitem__(self, idx):
        return self.values[idx]

    def __len__(self):

        length = 0
        for val in self.values:
            length += 1
        return length

    def __add__(self, other):
        if self.__len__() != other.__len__():
            raise Exception(f"Dimension mismatch: {self.__len__()} != {other.__len__}")

        add = []
        for x, y in zip(self.values, other.values):
            add.append(x + y)
        return Vector(add)

    def __sub__(self, other):
        if self.__len__() != other.__len__():
            raise Exception(f"Dimension mismatch: {self.__len__()} != {other.__len__}")

        sub = []
        for x, y in zip(self.values, other.values):
            sub.append(x - y)
        return Vector(sub)

    def __mul__(self, scalar):
        if type(scalar) not in [int, float]:
            raise Exception(f"{scalar} is not an integer or a float")

        mul = []
        for val in self.values:
            mul.append(val * scalar)
        return Vector(mul)

    def __repr__(self):
        return f"Vector({self.values})"

    def __str__(self):
        vec_string = ''
        for idx in range(self.__len__()):
            vec_string = vec_string + str(self[idx]) + ', '
        vec_string = "(" + vec_string[:-2] + ")"
        return "Vector" + vec_string

In [None]:
class Matrix():

    def __init__(self, values):
        r"""
        Args:
            values (list of lists): A list of n (rows) lists.
                The n lists all have the same length m (columns).
        """
        self.rows = len(values)
        self.cols = len(values[0])
        self.shape = (self.rows, self.cols)
        self.values = self._create_matrix(values)

    def _create_matrix(self, values):

        for num in range(self.rows):
            if len(values[num]) != self.cols:
                raise Exception(f"Dimension mismatch: {len(num)} != {self.cols}")

        for num in range(self.rows):
            values[num] = list(values[num])

        return list(values)

    def __repr__(self):
        # This needs to return a string that tells you the class and some values.
        # Another way to think about it is that you need to give someone the right amount of
        # information to understand this class.
        return f"Matrix({self.values})"

    def __getitem__(self, i, j=None):
        # i is the row you want to access and j is the column you want to access.
        # Reference the vector class above.
        return self.values[i][j] if j else self.values[i]

    def __len__(self):
        # This should return an integer.
        return self.rows

    def __str__(self):
        # This should return a string that makes the elements easy to read.
        matrix_string = '['
        for rows in range(self.rows):
            matrix_string += '['
            for cols in range(self.cols):
                matrix_string += str(self.values[rows][cols]) + ', '
            matrix_string = matrix_string[:-2] + ']\n '
        matrix_string = matrix_string[:-2] + ']'
        return matrix_string

    def transpose(self):
        # Initialize
        transpose = [[0 for i in range(self.rows)] for i in range(self.cols)]

        # Set the values
        for row in range(self.rows):
            for col in range(self.cols):
                transpose[col][row] = self.values[row][col]

        return Matrix(transpose)

    def __add__(self, other):
        if not(self.rows == other.rows and self.cols == other.cols):
            raise Exception("The rows or columns do not match.")

        add = [[None for i in range(self.cols)] for j in range(self.rows)]

        for row in range(self.rows):
            for col in range(self.cols):
                add[row][col] = self.values[row][col] + other.values[row][col]

        return Matrix(add)

    def __sub__(self, other):
        if not(self.rows == other.rows and self.cols == other.cols):
            raise Exception("The rows or columns do not match.")

        sub = [[None for i in range(self.cols)] for j in range(self.rows)]

        for row in range(self.rows):
            for col in range(self.cols):
                sub[row][col] = self.values[row][col] - other.values[row][col]

        return Matrix(sub)

In [None]:
def dot_product(vector1, vector2):
    # We need to initialize a value
    dot = 0

    # We need to do a for loop
    for num in range(len(vector1)):
        dot += vector1[num] * vector2[num]

    return dot

In [None]:
def mat_vec_mul(matrix, vector):
    # Our input is a matrix and a vector
    # Our output is a vector
    out = [0 for i in range(matrix.rows)]

    for row in range(matrix.rows):
        out[row] = dot_product(Vector(matrix[row]), vector)

#        for col in range(matrix.cols):
#            out[row] += matrix[row][col] * vector[col]

    return Vector(out)

In [None]:
def matrix_mult(matrix1, matrix2):
    # The inputs are two matrices.
    # The output is a matrix.
    if matrix1.cols != matrix2.rows:
        raise Exception(f"Dimension mismatch! Rows should equal columns, but {matrix1.cols} != {matrix2.rows}")

    out = [[0 for i in range(matrix2.cols)] for j in range(matrix1.rows)]

    for row in range(matrix1.rows):
        for col in range(matrix2.cols):
            for k in range(matrix1.cols):
                out[row][col] += matrix1[row][k] * matrix2[k][col]

    return Matrix(out)

In [None]:
A = Matrix([[1, 2, 3], [4, 5, 6]])
B = Matrix([[1, 2], [3, 4], [5, 6]])

C = matrix_mult(A, B)
print(C)

We're going to talk about the numpy and pytorch libraries and if we have time, we're going to go back to the BLAS functions!

The numpy `ndarray` class and the pytorch `Tensor` class operate similarly.
We're going to just call these tensors from now on.
How we can think of these is that they are an array of an array of an array of arrays, etc.
So, a vector is an array of numbers.
A matrix is an array of an array of numbers.
We can have an array of matrices, which is an array of an array of an array of numbers.
We can keep going.

The **shape** of a tensor is the length of each array.
For an $n$-dimensional vector, it has shape  $(n, )$.
For an $n \times m$ matrix, it has shape $(n, m)$.
For a collection of $p$ different $n \times m$ matrices, the collection would have shape $(p, n, m)$.

The primary example we're going to have is images.
A black and white image can be represented by a matrix, where the values of the matrix corresponds to the intensity of the light.
So, if we had an image of height 224 pixels and width 224 pixels, the matrix would have shape $(224, 224)$.
Typically, the height is the number of rows and the width is the number of columns in the matrix.

A color image can be represented by 3 matrices; this corresponds to the different color channels **rgb**, which stands for red green blue.
There is a matrix that corresponds to the red values, a matrix that corresponds to the blue values, and a matrix that corresponds to the green values.
Each of these matrices would have shape $(224, 224)$ and the collection of the matrices would have shape $(3, 224, 224)$.
Typically, we call the number 3 above the channels.
You should note that different storage systems have the channel in different places.

If we have a collection of color images, say 16, then that could be represented as a tensor of shape $(16, 3, 224, 224)$.

There are several ways to make an `ndarray` or `tensor`.

In [None]:
x = np.array([[1, 2, 3], [4, 5, 6]])

In [None]:
x

array([[1, 2, 3],
       [4, 5, 6]])

In [None]:
x_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])

In [None]:
x_tensor

tensor([[1, 2, 3],
        [4, 5, 6]])

In [None]:
x_tensor = torch.tensor(x)

In [None]:
x_tensor

tensor([[1, 2, 3],
        [4, 5, 6]])

In [None]:
x_numpy = x_tensor.numpy()

In [None]:
x_numpy

array([[1, 2, 3],
       [4, 5, 6]])

We can get the shape from the `ndarray` and `tensor` by:

In [None]:
print(x_numpy.shape)
print(x_tensor.shape)

(2, 3)
torch.Size([2, 3])


An important fact to know is that all `ndarrays` and `tensors` are stored as flat arrays.
That is, they're stored as arrays of shape $(n, )$.
If we have a `tensor` of shape $(16, 3, 224, 224)$, then it's stored as a **flat** array of $16 \times 3 \times 224 \times 224 = 2,408,448$ values.
How the `ndarray` and `tensor` transforms this data is by what are called **strides**.
The stride tells us how to skip over the data.

In [None]:
x_numpy.strides

(24, 8)

The layout of `x_numpy` is $[1, 2, 3, 4, 5, 6]$.
Each of these values in the array occupy 8 bytes.
If we want to get element $4$, we would have to go $24 + 8$ bytes into our array.

It should be noted that the numpy strides are based on bytes! In numpy, ints are 64 bit by default, so they cover 8 bytes.

In [None]:
type(x_numpy[0][0])

numpy.int64

I prefer the way pytorch gives us the strides.
It abstracts away the size of our objects in memory and just treats it like an array of arrays.

In [None]:
x_tensor.stride()

(3, 1)

The layout of `x_tensor` is the same as `x_numpy`.
If we want to access the element in row $i$  and column $j$, we select the $3 * i + j$th element in our array.

If we have a tensor of shape $(16, 3, 28, 28)$, a collection of 16 color images with height and width of 28 pixels, then the stride will be $(2352, 784,28, 1)$.
So, if we want to access the pixel at $(5, 1, 15, 13)$, we call `images[4][0][14][12]`.
Under the hood, the tensor is calling `images[4 * 2352 + 0 * 784 + 28 * 14 + 1 * 12]` or `images[9812]`.

In [None]:
images = torch.rand((16, 3, 28, 28))

In [None]:
images.shape

torch.Size([16, 3, 28, 28])

In [None]:
images.stride()

(2352, 784, 28, 1)

In [None]:
images[4][0][14][12]

tensor(0.0178)

In [None]:
torch.flatten(images).shape

torch.Size([37632])

In [None]:
torch.flatten(images)[9812]

tensor(0.0178)

Now, we're going to look at how to do our operations above.

In [None]:
tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
tensor2 = torch.tensor([[1, 2], [3, 4], [5, 6]])

In [None]:
torch.matmul(tensor1, tensor2)

tensor([[22, 28],
        [49, 64]])

If we want to do matrix vector multiplication we do:

In [None]:
matrix = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
vector = torch.tensor([1, 5, 3])

In [None]:
torch.mv(matrix, vector)

tensor([20, 47, 74])

The dot product is:

In [None]:
vector1 = torch.tensor([1, 5, 3])
vector2 = torch.tensor([2, 1, 2])

In [None]:
torch.dot(vector1, vector2)

tensor(13)

Finally, one important concept to know about is broadcasting.
Broadcasting allows us to add or multiply a matrix and a vector together or in general any `tensor` with any `tensor` as long as a few rules are followed:

### Broadcasting rules

These rules are determined by the shape: the rightmost elements of the shape are the same or one of them is 1, then proceeds to the left.

So, we can add a $(28, )$-tensor to a $(16, 3, 28, 28)$-tensor.
We can add a $(3, 1, 28)$-tensor to a $(16, 3, 28, 28)$-tensor.
We can add a $(3, 28, 28)$-tensor to a $(16, 3, 28, 28)$-tensor.
Or even a $(3, 4, 1)$-tensor to a $(3, 1, 5)$-tensor.

In [None]:
x1 = torch.rand(28)
y = torch.rand(16, 3, 28, 28)

In [None]:
(x1 + y).shape

torch.Size([16, 3, 28, 28])

In [None]:
x2 = torch.rand(3, 1, 28)

In [None]:
(x2 + y).shape

torch.Size([16, 3, 28, 28])

In [None]:
x3 = torch.rand(3, 28, 28)

In [None]:
(x3 + y).shape

torch.Size([16, 3, 28, 28])

In [None]:
z1 = torch.rand(3, 4, 1)
z2 = torch.rand(3, 1, 5)

In [None]:
(z1 + z2).shape

torch.Size([3, 4, 5])

In [None]:
def is_broadcastable(shape1, shape2):

    # Initialization
    broadcast = False

    # We need to only check over the smallest shape.
    min_length = min(len(shape1), len(shape2))

    # We start at the end.
    shape1 = list(reversed(shape1))
    shape2 = list(reversed(shape2))

    # We only keep the necessary elements.
    shape1 = shape1[:min_length]
    shape2 = shape2[:min_length]

    # zip combines shape1 and shape2
    for x, y in zip(shape1, shape2):
        # We check that the shapes are the same or one of them is 1.
        if (x == y) or (x == 1) or (y == 1):
            broadcast = True
        else:
            # If the above condition ever fails, the tensors are not broadcastable.
            return False

    return broadcast

In [None]:
is_broadcastable((8, 1, 6, 1), (7, 1, 5))

True