In [61]:
import pandas as pd
import numpy as np
import torch

In [62]:
print(torch.__version__)

2.2.0+cu121


# Task 1

Loading the synthetic dataset.

In [63]:
# You may need to edit the path, depending on where you put the files.
data = pd.read_csv('a4_synthetic.csv')

X = data.drop(columns='y').to_numpy()
Y = data.y.to_numpy()

Training a linear regression model for this synthetic dataset.

In [64]:
# Initialize parameters
np.random.seed(69)
w_init = np.random.normal(size=(2, 1))
b_init = np.random.normal(size=(1, 1))

# Declare parameter tensors
w = torch.tensor(w_init, requires_grad=True)
b = torch.tensor(b_init, requires_grad=True)

eta = 1e-2
opt = torch.optim.SGD([w, b], lr=eta)  # Define SGD optimizer

# Training loop
for i in range(10):
    sum_err = 0
    for row in range(X.shape[0]):
        x = torch.tensor(X[[row], :],)
        y = torch.tensor(Y[[row]])

        # Forward pass
        y_pred = x @ w + b  # Matrix multiplication for prediction
        err = torch.square((y_pred - y))  # Compute squared error loss

        # Backward pass and update
        
        err.backward()  # Compute gradients
        opt.step()  # Update parameters
        opt.zero_grad()  # Clear gradients

        # For statistics
        sum_err += err.item()

    mse = sum_err / X.shape[0]
    print(f'Epoch {i+1}: MSE =', mse)


Epoch 1: MSE = 0.39164808632788506
Epoch 2: MSE = 0.016984872842749174
Epoch 3: MSE = 0.009564297806691957
Epoch 4: MSE = 0.009377840166444317
Epoch 5: MSE = 0.009368557944496174
Epoch 6: MSE = 0.009367457445536662
Epoch 7: MSE = 0.009367280149523092
Epoch 8: MSE = 0.00936725047479511
Epoch 9: MSE = 0.009367245535839945
Epoch 10: MSE = 0.009367244720749514


# Task 2

In [131]:
class Tensor:

    # Constructor. Just store the input values.
    def __init__(self, data, requires_grad=False, grad_fn=None):
        self.data = data
        self.shape = data.shape
        self.grad_fn = grad_fn
        self.requires_grad = requires_grad
        self.grad = None

    # So that we can print the object or show it in a notebook cell.
    def __repr__(self):
        dstr = repr(self.data)
        if self.requires_grad:
            gstr = ', requires_grad=True'
        elif self.grad_fn is not None:
            gstr = f', grad_fn={self.grad_fn}'
        else:
            gstr = ''
        return f'Tensor({dstr}{gstr})'

    # Extract one numerical value from this tensor.
    def item(self):
        return self.data.item()

    # YOUR WORK WILL BE DONE BELOW

    # For Task 2:

    # Operator +
    def __add__(self, right):
        # Call the helper function defined below.
        return addition(self, right)

    # Operator -
    def __sub__(self, right):
        return subtraction(self, right)

    # Operator @
    def __matmul__(self, right):
        return matmul(self, right)

    # Operator **
    def __pow__(self, right):
        # NOTE! We are assuming that right is an integer here, not a Tensor!
        if not isinstance(right, int):
            raise Exception('only integers allowed')
        if right < 2:
            raise Exception('power must be >= 2')
        return exponentiation(self, right)


    def backward(self, grad_output=None):
        if self.grad_fn is not None:
            # If grad_fn is defined, we have computed this tensor using some operation.
            if grad_output is None:
                # Raise an error if gradient of the loss function is required
                self.grad_fn.backward(np.ones(self.shape))
            else:
                # This is an intermediate node in the computational graph.
                # This corresponds to any intermediate computation, such as
                # a hidden layer.
                self.grad_fn.backward(grad_output)
        else:
            # If grad_fn is not defined, this is an endpoint in the computational
            # graph: learnable model parameters or input data.
            if self.requires_grad:
                self.grad = grad_output
            else:
                # This tensor *does not require* a gradient to be computed. This
                # will typically be a tensor holding input data.
                return


# A small utility where we simply create a Tensor object. We use this to
# mimic torch.tensor.
def tensor(data, requires_grad=False):
    return Tensor(data, requires_grad)

# We define helper functions to implement the various arithmetic operations.

# This function takes two tensors as input, and returns a new tensor holding
# the result of an element-wise addition on the two input tensors.
def addition(left, right):
    new_data = left.data + right.data
    grad_fn = AdditionNode(left, right)
    
    return Tensor(new_data, grad_fn=grad_fn)

def subtraction(left, right):
    new_data = left.data - right.data
    grad_fn = SubtractionNode(left, right)
    
    return Tensor(new_data, grad_fn=grad_fn)

# Operator **
def exponentiation(self, right):
    # Check if the exponent is an integer and >= 2
    if not isinstance(right, int):
        raise ValueError("Exponent must be an integer")
    if right < 2:
        raise ValueError("Exponent must be >= 2")
    new_data = self.data ** right
    grad_fn = ExponentiationNode(tensor, right)
    
    return Tensor(new_data, grad_fn=grad_fn)

def matmul(left, right):
    # Check if the shapes of the tensors are compatible for matrix multiplication
    if left.shape[1] != right.shape[0]:
        raise ValueError("Shapes are not compatible for matrix multiplication")
    new_data = left.data @ right.data
    grad_fn = MatrixMultiplicationNode(left, right)
    return Tensor(new_data, grad_fn=grad_fn)

Some sanity checks.

In [132]:
# Two tensors holding row vectors.
x1 = tensor(np.array([[2.0, 3.0]]))
x2 = tensor(np.array([[1.0, 4.0]]))
# A tensors holding a column vector.
w = tensor(np.array([[-1.0], [1.2]]))

# Test the arithmetic operations.
test_plus = x1 + x2
test_minus = x1 - x2
test_power = x2 ** 2
test_matmul = x1 @ w

print(f'Test of addition: {x1.data} + {x2.data} = {test_plus.data}')
print(f'Test of subtraction: {x1.data} - {x2.data} = {test_minus.data}')
print(f'Test of power: {x2.data} ** 2 = {test_power.data}')
print(f'Test of matrix multiplication: {x1.data} @ {w.data} = {test_matmul.data}')

# Check that the results are as expected. Will crash if there is a miscalculation.
assert(np.allclose(test_plus.data, np.array([[3.0, 7.0]])))
assert(np.allclose(test_minus.data, np.array([[1.0, -1.0]])))
assert(np.allclose(test_power.data, np.array([[1.0, 16.0]])))
assert(np.allclose(test_matmul.data, np.array([[1.6]])))

Test of addition: [[2. 3.]] + [[1. 4.]] = [[3. 7.]]
Test of subtraction: [[2. 3.]] - [[1. 4.]] = [[ 1. -1.]]
Test of power: [[1. 4.]] ** 2 = [[ 1. 16.]]
Test of matrix multiplication: [[2. 3.]] @ [[-1. ]
 [ 1.2]] = [[1.6]]


# Tasks 3 and 4

In [133]:
class Node:
    def __init__(self):
        pass

    def backward(self, grad_output):
        if self.grad_fn is not None:
            self.grad_fn.backward(grad_output)

    def __repr__(self):
        return str(type(self))


class AdditionNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right

    def backward(self, grad_output):
        self.left.backward(grad_output)
        self.right.backward(grad_output)
        
        
class SubtractionNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right
        
    def backward(self, grad_output):
        self.left.backward(grad_output)
        self.right.backward(grad_output)
    
        
class MatrixMultiplicationNode(Node):
    def __init__(self, left, right):
        self.left = left
        self.right = right
    
    def backward(self, grad_output):
        self.left.backward(grad_output @ self.right.data.T)
        self.right.backward(self.left.data.T @ grad_output)
        
class ExponentiationNode(Node):
    def __init__(self, tensor, exponent):
        self.tensor = tensor
        self.exponent = exponent
        
    def backward(self, grad_output):
        grad_input = grad_output * self.exponent * self.tensor.data ** (self.exponent - 1)
        self.tensor.backward(grad_input)

Sanity check for Task 3.

In [134]:
x = tensor(np.array([[2.0, 3.0]]))
w1 = tensor(np.array([[1.0, 4.0]]), requires_grad=True)
w2 = tensor(np.array([[3.0, -1.0]]), requires_grad=True)

test_graph = x + w1 + w2

print('Computational graph top node after x + w1 + w2:', test_graph.grad_fn)

assert(isinstance(test_graph.grad_fn, AdditionNode))
assert(test_graph.grad_fn.right is w2)
assert(test_graph.grad_fn.left.grad_fn.left is x)
assert(test_graph.grad_fn.left.grad_fn.right is w1)

Computational graph top node after x + w1 + w2: <class '__main__.AdditionNode'>


Sanity check for Task 4.

In [135]:
x = tensor(np.array([[2.0, 3.0]]))
w = tensor(np.array([[-1.0], [1.2]]), requires_grad=True)
y = tensor(np.array([[0.2]]))

# We could as well write simply loss = (x @ w - y)**2
# We break it down into steps here if you need to debug.

model_out = x @ w
diff = model_out - y
loss = diff ** 2

loss.backward()

print('Gradient of loss w.r.t. w =\n', w.grad)

assert(np.allclose(w.grad, np.array([[5.6], [8.4]])))
assert(x.grad is None)
assert(y.grad is None)

AttributeError: 'function' object has no attribute 'data'

An equivalent cell using PyTorch code. Your implementation should give the same result for `w.grad`.

In [None]:
pt_x = torch.tensor(np.array([[2.0, 3.0]]))
pt_w = torch.tensor(np.array([[-1.0], [1.2]]), requires_grad=True)
pt_y = torch.tensor(np.array([[0.2]]))

pt_model_out = pt_x @ pt_w
pt_model_out.retain_grad() # Keep the gradient of intermediate nodes for debugging.

pt_diff = pt_model_out - pt_y
pt_diff.retain_grad()

pt_loss = pt_diff ** 2
pt_loss.retain_grad()

pt_loss.backward()
pt_w.grad

# Task 5

In [None]:
class Optimizer:
    def __init__(self, params):
        self.params = params

    def zero_grad(self):
        for p in self.params:
            p.grad = np.zeros_like(p.data)

    def step(self):
        raise NotImplementedError('Unimplemented')


class SGD(Optimizer):
    def __init__(self, params, lr):
        super().__init__(params)
        self.lr = lr

    def step(self):
        raise NotImplementedError('Unimplemented')

# Task 6

**For Colab users:** the following command downloads the second CSV file.

In [None]:
!wget --no-check-certificate https://www.cse.chalmers.se/~richajo/dit866/assignments/a4/data/raisins.csv

In [None]:
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

# You may need to edit the path, depending on where you put the files.
a4data = pd.read_csv('raisins.csv')

X = scale(a4data.drop(columns='Class'))
Y = 1.0*(a4data.Class == 'Besni').to_numpy()

np.random.seed(0)
shuffle = np.random.permutation(len(Y))
X = X[shuffle]
Y = Y[shuffle]

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, random_state=0, test_size=0.2)

In [None]:
Xtrain.shape, Ytrain.shape