## Problem 1

#### Linear Regression algorithm

In [1]:
import numpy as np
import pandas as pd

def linear_regression(x, y):
    X = np.column_stack((np.ones(len(x)), x))

    # Calculate A
    A = np.dot(np.transpose(X), X)

    # Calculate b
    b = np.dot(np.transpose(X), y)

    # Calculate w
    w = np.dot(np.linalg.inv(A), b)
    return w

#### Dataset 1

In [2]:
data = pd.read_csv('HW6data.csv')
x = data['X']
y = data['Y']

print(linear_regression(x, y))

[98.71471814  0.97087035]


#### Dataset 2

In [3]:
data = pd.read_csv('HW6data2.csv')
x = data.iloc[:,:-1]
y = data.iloc[:,-1]

print(linear_regression(x, y))

[-3.79138915e+00 -3.85291886e+00  2.48826650e+00  3.49878682e-03
 -1.95366854e-03]


## Problem 2

#### Gradient Descent Algorithm

In [4]:
# Parameters:
# f: derivative of function
# x0 (numpy.ndarray): starting point
# n (float): learning rate 
# max_iter (int): max iterations
# delta (float): threshold for stopping condition

def gradient_descent(f, x0 = np.array([0]), n=0.1, max_iter=100000, delta=1e-3):
    x = x0.copy()
    
    for i in range(max_iter):
        # Run x through function
        grad = f(x)
        
        # Check if gradient is very small
        if np.linalg.norm(grad) < delta:
            break
        
        # Check if change in function change is very small
        change = np.linalg.norm(f(x - n * grad) - grad)
        if np.all(change < delta):
            break
        x = x - n * grad
        
    return x

#### Part A

In [5]:
# Derivative of function in part a
def a(x):
    return (x[0] - 2) * (x[0] - 5) + (x[0] - 1) * (x[0] - 5) + (x[0] - 1) * (x[0] - 2)

##### (i) η = 0.1, x0 = 3

In [6]:
x0 = np.array([3])
print(gradient_descent(a, x0, 0.1))

[3.8683935]


##### (ii) η = 0.05, x0 = 6

In [7]:
x0 = np.array([6])
print(gradient_descent(a, x0, 0.05))

[3.86887478]


##### (iii) η = 0.01, x0 = 1

In [8]:
x0 = np.array([1])
print(gradient_descent(a, x0, 0.01))

  return (x[0] - 2) * (x[0] - 5) + (x[0] - 1) * (x[0] - 5) + (x[0] - 1) * (x[0] - 2)
  change = np.linalg.norm(f(x - n * grad) - grad)


[-inf]


#### Part B

In [9]:
# Derivative of function in part b
def b(x):
    return np.array([(2 * x[0] - 2 * x[1] ** 2), (4 * x[1] ** 3 - 4 * x[0] * x[1])])

#### (i) η = 0.1, x0 = 1, y0 = 2

In [10]:
x0 = np.array([1, 2])
print(gradient_descent(b, x0, 0.1))

[ 1.03905906 -1.01937892]


#### (ii) η = 0.1, x0 = 4, y0 = −2

In [11]:
x0 = np.array([4, -2])
print(gradient_descent(b, x0, 0.1))

[ 4 -2]


#### (iii) η = 0.05, x0 = 0, y0 = 3

In [12]:
x0 = np.array([0, 3])
print(gradient_descent(b, x0, 0.05))

[ 0.27776391 -0.52567075]


## Problem 3

In [13]:
x = np.array([6, 6, 6, 2, 2, 5, 4, 5, 1, 4])
y = np.array([270, 260, 275, 405, 364, 295, 335, 308, 405, 305])
X = np.column_stack((np.ones(len(x)), x))

# Gradient of error function
def error_function(w, X, y):
    N = len(X)
    return (2 / N) * np.dot(np.transpose(X), np.dot(X, w) - y)

# Gradient descent algorithm
w = np.zeros(X.shape[1])
delta = 1e-3
n = 0.04

while(True):
    grad = error_function(w, X, y)
    
    w_new = w - n * grad
    
    # Check if change in w is less than delta
    if np.all(np.abs(w_new - w) < delta):
        break
    
    w = w_new
    
print('w:', w)

w: [436.51842448 -27.88557582]
