# Assignment 4
## Group Members:
### Nils Dunlop, e-mail: gusdunlni@student.gu.se
### Francisco Alejandro Erazo Piza, e-mail: guserafr@student.gu.se
### Chukwudumebi Ubogu, e-mail: gusuboch@student.gu.se
***

### Task 1: A small linear regression example in PyTorch
***

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import optim

data = pd.read_csv('a4_synthetic.csv')

X = data.drop(columns='y').to_numpy()
Y = data.y.to_numpy()

(X, Y)

(array([[ 1.76405235,  0.40015721],
        [ 0.97873798,  2.2408932 ],
        [ 1.86755799, -0.97727788],
        [ 0.95008842, -0.15135721],
        [-0.10321885,  0.4105985 ],
        [ 0.14404357,  1.45427351],
        [ 0.76103773,  0.12167502],
        [ 0.44386323,  0.33367433],
        [ 1.49407907, -0.20515826],
        [ 0.3130677 , -0.85409574],
        [-2.55298982,  0.6536186 ],
        [ 0.8644362 , -0.74216502],
        [ 2.26975462, -1.45436567],
        [ 0.04575852, -0.18718385],
        [ 1.53277921,  1.46935877],
        [ 0.15494743,  0.37816252],
        [-0.88778575, -1.98079647],
        [-0.34791215,  0.15634897],
        [ 1.23029068,  1.20237985],
        [-0.38732682, -0.30230275],
        [-1.04855297, -1.42001794],
        [-1.70627019,  1.9507754 ],
        [-0.50965218, -0.4380743 ],
        [-1.25279536,  0.77749036],
        [-1.61389785, -0.21274028],
        [-0.89546656,  0.3869025 ],
        [-0.51080514, -1.18063218],
        [-0.02818223,  0.428

In [2]:
np.random.seed(1)
torch.manual_seed(1)

w_init = np.random.normal(size=(2, 1))
b_init = np.random.normal(size=(1, 1))

# Declare the parameter tensors
w = torch.tensor(w_init, dtype=torch.float32, requires_grad=True)
b = torch.tensor(b_init, dtype=torch.float32, requires_grad=True)

eta = 1e-2
opt = optim.SGD([w, b], lr=eta)

for i in range(10):

    sum_err = 0

    for row in range(X.shape[0]):
        x = torch.tensor(X[row, :], dtype=torch.float32).view(1, -1)
        y = torch.tensor(Y[row], dtype=torch.float32).view(1, -1)

        # Forward pass.
        y_pred = x.mm(w) + b
        err = (y_pred - y).pow(2).sum()

        # Backward and update.
        opt.zero_grad()
        err.backward()
        opt.step()

        # For statistics.
        sum_err += err.item()

    mse = sum_err / X.shape[0]
    print(f'Epoch {i+1}: MSE =', mse)

Epoch 1: MSE = 0.7999662647869263
Epoch 2: MSE = 0.017392394159767264
Epoch 3: MSE = 0.009377418162580966
Epoch 4: MSE = 0.009355327616258364
Epoch 5: MSE = 0.009365440349979508
Epoch 6: MSE = 0.009366988411857164
Epoch 7: MSE = 0.009367207068114567
Epoch 8: MSE = 0.009367238481529512
Epoch 9: MSE = 0.009367244712136654
Epoch 10: MSE = 0.009367244620257224


### Task 2: Implementing tensor arithmetics for forward computations
***

In [3]:
class Tensor:

    # Constructor. Just store the input values.
    def __init__(self, data, requires_grad=False, grad_fn=None):
        self.data = data
        self.shape = data.shape
        self.grad_fn = grad_fn
        self.requires_grad = requires_grad
        self.grad = None

    # So that we can print the object or show it in a notebook cell.
    def __repr__(self):
        dstr = repr(self.data)
        if self.requires_grad:
            gstr = ', requires_grad=True'
        elif self.grad_fn is not None:
            gstr = f', grad_fn={self.grad_fn}'
        else:
            gstr = ''
        return f'Tensor({dstr}{gstr})'

    # Extract one numerical value from this tensor.
    def item(self):
        return self.data.item()

    # Operator +
    def __add__(self, right):
        if not isinstance(right, Tensor):
            right = tensor(right)
        return addition(self, right)

    # Operator -
    def __sub__(self, right):
        if not isinstance(right, Tensor):
            right = tensor(right)
        return subtraction(self, right)

    # Operator @
    def __matmul__(self, right):
        if not isinstance(right, Tensor):
            raise ValueError("Right operand must be a Tensor")
        return matrix_multi(self, right)

    # Operator **
    def __pow__(self, right):
        # NOTE! We are assuming that right is an integer here, not a Tensor!
        if not isinstance(right, int):
            raise Exception('only integers allowed')
        if right < 2:
            raise Exception('power must be >= 2')
        return power(self, right)
    
    def sum(self):
        sum_data = np.sum(self.data)
        grad_fn = SummationNode(self) if self.requires_grad else None
        return Tensor(sum_data, requires_grad=self.requires_grad, grad_fn=grad_fn)


    # Backward computations. Will be implemented in Task 4.
    def backward(self, grad_output=None):
        if not self.requires_grad:
            return

        if grad_output is None:
            grad_output = np.ones_like(self.data)

        if self.grad is None:
            self.grad = grad_output
        else:
            self.grad += grad_output

        if self.grad_fn:
            self.grad_fn.backward(grad_output)
            
# A small utility where we simply create a Tensor object
def tensor(data, requires_grad=False):
    return Tensor(data, requires_grad)

# We define helper functions to implement the various arithmetic operations.

# This function takes two tensors as input, and returns a new tensor holding
# the result of an element-wise addition on the two input tensors.
def addition(left, right):
    new_data = left.data + right.data
    grad_fn = grad_fn = AdditionNode(left, right) if left.requires_grad or right.requires_grad else None
    return Tensor(new_data, requires_grad=left.requires_grad or right.requires_grad, grad_fn=grad_fn)

def subtraction(left, right):
    new_data = left.data - right.data
    grad_fn = SubtractionNode(left, right) if left.requires_grad or right.requires_grad else None
    return Tensor(new_data, requires_grad=left.requires_grad or right.requires_grad, grad_fn=grad_fn)

def matrix_multi(left, right):
    if left.shape[1] != right.shape[0]:
        raise ValueError("Shapes are not compatible for matrix multiplication")
    new_data = np.dot(left.data, right.data)
    grad_fn = MatMulNode(left, right) if left.requires_grad or right.requires_grad else None
    return Tensor(new_data, requires_grad=left.requires_grad or right.requires_grad, grad_fn=grad_fn)

def power(tensor, exponent):
    new_data = tensor.data ** exponent
    grad_fn = PowerNode(tensor, exponent) if tensor.requires_grad else None
    return Tensor(new_data, requires_grad=tensor.requires_grad, grad_fn=grad_fn)

class Node:
    def __init__(self):
        pass

    def backward(self, grad_output):
        raise NotImplementedError('Backward method not implemented.')

    def __repr__(self):
        return str(type(self))

class AdditionNode(Node):
    def __init__(self, left, right):
        super().__init__()
        self.left = left
        self.right = right
    
    def backward(self, grad_output):
        if self.left.requires_grad:
            if self.left.grad is None:
                self.left.grad = grad_output
            else:
                self.left.grad += grad_output

            if self.left.grad_fn:
                self.left.grad_fn.backward(grad_output)
        
        if self.right.requires_grad:
            if self.right.grad is None:
                self.right.grad = grad_output
            else:
                self.right.grad += grad_output

            if self.right.grad_fn:
                self.right.grad_fn.backward(grad_output)


class SubtractionNode(Node):
    def __init__(self, left, right):
        super().__init__()
        self.left = left
        self.right = right

    def backward(self, grad_output):
        if self.left.requires_grad:
            if self.left.grad is None:
                self.left.grad = grad_output
            else:
                self.left.grad += grad_output

            if self.left.grad_fn:
                self.left.grad_fn.backward(grad_output)
        
        if self.right.requires_grad:
            neg_grad_output = -grad_output
            if self.right.grad is None:
                self.right.grad = neg_grad_output
            else:
                self.right.grad += neg_grad_output

            if self.right.grad_fn:
                self.right.grad_fn.backward(neg_grad_output)

class MatMulNode(Node):
    def __init__(self, left, right):
        super().__init__()
        self.left = left
        self.right = right

    def backward(self, grad_output):
        if self.left.requires_grad:
            grad_left = np.dot(grad_output, self.right.data.T)
            if self.left.grad is None:
                self.left.grad = grad_left
            else:
                self.left.grad += grad_left
            if self.left.grad_fn:
                self.left.grad_fn.backward(grad_left)

        if self.right.requires_grad:
            grad_right = np.dot(self.left.data.T, grad_output)
            if self.right.grad is None:
                self.right.grad = grad_right
            else:
                self.right.grad += grad_right
            if self.right.grad_fn:
                self.right.grad_fn.backward(grad_right)

class PowerNode(Node):
    def __init__(self, tensor, exponent):
        super().__init__()
        self.tensor = tensor
        self.exponent = exponent

    def backward(self, grad_output):
        if self.tensor.requires_grad:
            # Compute the gradient of the power function w.r.t. its input
            grad_tensor = self.exponent * (self.tensor.data ** (self.exponent - 1))

            # Multiply by the incoming gradient
            if self.tensor.grad is None:
                self.tensor.grad = grad_output * grad_tensor
            else:
                self.tensor.grad += grad_output * grad_tensor

            # Propagate the gradient to the tensor's grad_fn if it exists
            if self.tensor.grad_fn:
                self.tensor.grad_fn.backward(grad_output * grad_tensor)

class SummationNode(Node):
    def __init__(self, tensor):
        super().__init__()
        self.tensor = tensor

    def backward(self, grad_output):
        if self.tensor.requires_grad:
            if self.tensor.grad is None:
                self.tensor.grad = np.ones_like(self.tensor.data) * grad_output
            else:
                self.tensor.grad += np.ones_like(self.tensor.data) * grad_output

            if self.tensor.grad_fn:
                self.tensor.grad_fn.backward(grad_output)

In [4]:
# Two tensors holding row vectors.
x1 = tensor(np.array([[2.0, 3.0]]))
x2 = tensor(np.array([[1.0, 4.0]]))
# A tensors holding a column vector.
w = tensor(np.array([[-1.0], [1.2]]))

# Test the arithmetic operations.
test_plus = x1 + x2
test_minus = x1 - x2
test_power = x2 ** 2
test_matmul = x1 @ w

print(f'Test of addition: {x1.data} + {x2.data} = {test_plus.data}')
print(f'Test of subtraction: {x1.data} - {x2.data} = {test_minus.data}')
print(f'Test of power: {x2.data} ** 2 = {test_power.data}')
print(f'Test of matrix multiplication: {x1.data} @ {w.data} = {test_matmul.data}')

# Check that the results are as expected. Will crash if there is a miscalculation.
assert(np.allclose(test_plus.data, np.array([[3.0, 7.0]])))
assert(np.allclose(test_minus.data, np.array([[1.0, -1.0]])))
assert(np.allclose(test_power.data, np.array([[1.0, 16.0]])))
assert(np.allclose(test_matmul.data, np.array([[1.6]])))

Test of addition: [[2. 3.]] + [[1. 4.]] = [[3. 7.]]
Test of subtraction: [[2. 3.]] - [[1. 4.]] = [[ 1. -1.]]
Test of power: [[1. 4.]] ** 2 = [[ 1. 16.]]
Test of matrix multiplication: [[2. 3.]] @ [[-1. ]
 [ 1.2]] = [[1.6]]


### Task 3: Building the computational graph
***

In [5]:
x = tensor(np.array([[2.0, 3.0]]))
w1 = tensor(np.array([[1.0, 4.0]]), requires_grad=True)
w2 = tensor(np.array([[3.0, -1.0]]), requires_grad=True)

test_graph = x + w1 + w2

print('Computational graph top node after x + w1 + w2:', test_graph.grad_fn)

assert(isinstance(test_graph.grad_fn, AdditionNode))
assert(test_graph.grad_fn.right is w2)
assert(test_graph.grad_fn.left.grad_fn.left is x)
assert(test_graph.grad_fn.left.grad_fn.right is w1)

Computational graph top node after x + w1 + w2: <class '__main__.AdditionNode'>


### Task 4: Implementing the backward computations
***

In [6]:
x = tensor(np.array([[2.0, 3.0]]))
w = tensor(np.array([[-1.0], [1.2]]), requires_grad=True)
y = tensor(np.array([[0.2]]))

# We could as well write simply loss = (x @ w - y)**2
# We break it down into steps here if you need to debug.

model_out = x @ w
diff = model_out - y
loss = diff ** 2

loss.backward()

print('Gradient of loss w.r.t. w =\n', w.grad)

assert(np.allclose(w.grad, np.array([[5.6], [8.4]])))
assert(x.grad is None)
assert(y.grad is None)

Gradient of loss w.r.t. w =
 [[5.6]
 [8.4]]


In [7]:
pt_x = torch.tensor(np.array([[2.0, 3.0]]))
pt_w = torch.tensor(np.array([[-1.0], [1.2]]), requires_grad=True)
pt_y = torch.tensor(np.array([[0.2]]))

pt_model_out = pt_x @ pt_w
pt_model_out.retain_grad() # Keep the gradient of intermediate nodes for debugging.

pt_diff = pt_model_out - pt_y
pt_diff.retain_grad()

pt_loss = pt_diff ** 2
pt_loss.retain_grad()

pt_loss.backward()
pt_w.grad

tensor([[5.6000],
        [8.4000]], dtype=torch.float64)

### Task 5: Optimizers to update the model parameters
***

In [8]:
class Optimizer:
    def __init__(self, params):
        self.params = params

    def zero_grad(self):
        for p in self.params:
            p.grad = np.zeros_like(p.data)

    def step(self):
        raise NotImplementedError('Step method not implemented.')
    
class SGD(Optimizer):
    def __init__(self, params, lr):
        super().__init__(params)
        self.lr = lr

    def step(self):
        for p in self.params:
            p.data -= self.lr * p.grad

### Training Loop

In [9]:
np.random.seed(1)

w_init = np.random.normal(size=(2, 1))
b_init = np.random.normal(size=(1, 1))

w = tensor(w_init, requires_grad=True)
b = tensor(b_init, requires_grad=True)

optimizer = SGD([w, b], lr=1e-4)

for i in range(10):
    sum_err = 0
    optimizer.zero_grad()

    for row in range(X.shape[0]):
        x = tensor(X[row, :].reshape(1, -1), requires_grad=False)
        y = tensor(Y[row].reshape(1, -1), requires_grad=False)

        # Forward pass
        y_pred = x @ w + b
        err = ((y_pred - y) ** 2).sum()

        # Backward pass
        err.backward()

        # Update parameters
        optimizer.step()

        # For statistics
        sum_err += err.data

    mse = sum_err / X.shape[0]
    print(f'Epoch {i+1}: MSE = {mse}')

Epoch 1: MSE = 1.8310931353310658
Epoch 2: MSE = 0.025294540833766224
Epoch 3: MSE = 0.01007722900503592
Epoch 4: MSE = 0.009120703993537975
Epoch 5: MSE = 0.00904464904981755
Epoch 6: MSE = 0.00903635318076267
Epoch 7: MSE = 0.00903496660122323
Epoch 8: MSE = 0.0090346589582873
Epoch 9: MSE = 0.009034582893444464
Epoch 10: MSE = 0.00903456348115694


### Task 6: Classifying raisins
***

In [10]:
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

# You may need to edit the path, depending on where you put the files.
a4data = pd.read_csv('raisins.csv')

X = scale(a4data.drop(columns='Class'))
Y = 1.0*(a4data.Class == 'Besni').to_numpy()

np.random.seed(0)
shuffle = np.random.permutation(len(Y))
X = X[shuffle]
Y = Y[shuffle]

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, random_state=0, test_size=0.2)

In [11]:
Xtrain.shape, Ytrain.shape

((720, 7), (720,))