# Alex Stewart CS6966
## Assign 2
### 3/20/2022

In [1]:
import numpy as np
import time

#### Helper methods for part1

In [2]:
def compute_accuracy(w, x, y):
    return sum((np.sign(w.T @ x_) == y_) for x_, y_ in zip(x,y)) / x.shape[0]

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logistic_regression_grad(w,x_,y_):
    z = y_ * w.T @ x_
    sig_z = sigmoid(z)
    return (sig_z - 1) * x_ * y_

def gradient_descent(x, y, w, grad_fn, verbose=False, lr=.01, epochs=100):
    start_time = time.time()
    if verbose:
        print(f"epoch: -1, acc: {compute_accuracy(w,x,y)}")
    for _ in range(epochs):
        dw = np.zeros_like(w)

        for x_, y_ in zip(x,y):
            dw += grad_fn(w,x_,y_)

        dw /= x.shape[0]
        w -= (lr * dw)
        if verbose:
            print(f"epoch: {_}, acc: {compute_accuracy(w,x,y)}")

    print(f"Gradient Descent Time: {time.time() - start_time}")
    return w

#### Part 1 c

In [8]:
def part1_c():
    x,y = [], []

    for i in range(-50, 51):
        if i != 0:
            y.append(-1 if i < 0 else 1)
            x.append(i)

    x = np.array(x)[:, None]
    y = np.array(y)
    w = np.array([-1.0])
    w = gradient_descent(x, y, w, logistic_regression_grad, lr=.1, epochs=100)
    print(f"w: {w}")
    print(f"Percent correct: {compute_accuracy(w,x,y) * 100}%")

part1_c()

Gradient Descent Time: 0.08445525169372559
w: [0.36490098]
Percent correct: 100.0%


#### Part 1 d

In [9]:
def part1_d():
    x,y = [], []

    for i in range(-50, 51):
        if i != 0:
            y.append(-1 if i < 0 else 1)
            x.append(i)
            
    x = np.array(x)
    x[np.abs(x) >= 46] *= -1
    x = x[:, None]
    y = np.array(y)
    w = np.array([-1.0])
    w = gradient_descent(x, y, w, logistic_regression_grad, lr=.1, epochs=100)
    print(f"w: {w}")
    print(f"Percent correct: {compute_accuracy(w,x,y) * 100}%")

part1_d()

Gradient Descent Time: 0.09066915512084961
w: [0.0461487]
Percent correct: 90.0%


In [None]:
def l2_linear_regression_grad(w,A,b):
    return 2 * A.T @ (A @ w - b)

def linear_regression_vector_distance(x_star, x):
    return np.linalg.norm(x_star - x)

In [None]:
def part3_c():
    n = 500
    m = 2 * n
    A = np.random.rand(m, n) * 2 - 1
    x = np.random.rand(n, 1) * 2 - 1
    eta = np.random.randn(m, 1) * np.sqrt(.5)
    b = A @ x + eta
    return A, x, b[:, 0]

part3_c()

(array([[-0.2286413 ,  0.68527178,  0.0964632 , ..., -0.19293938,
         -0.02690234, -0.55392523],
        [ 0.27428428,  0.35239413,  0.44423531, ..., -0.44290557,
          0.53557557,  0.01575959],
        [-0.29973449, -0.06573577, -0.21873935, ...,  0.992451  ,
         -0.12730884,  0.01413272],
        ...,
        [-0.31587477, -0.45872725,  0.40315716, ...,  0.74958992,
         -0.64211545,  0.56830635],
        [-0.54881338, -0.10015992,  0.80303149, ...,  0.32218025,
          0.47460752,  0.07378771],
        [-0.8973508 , -0.40609086,  0.1721541 , ...,  0.65523265,
         -0.11633309, -0.38476633]]),
 array([[-0.78394085],
        [ 0.536388  ],
        [ 0.81057663],
        [-0.05696002],
        [ 0.7695703 ],
        [-0.06637499],
        [ 0.6064948 ],
        [-0.43171045],
        [ 0.40647518],
        [ 0.18696819],
        [-0.38866781],
        [ 0.64975358],
        [-0.56175278],
        [ 0.79504867],
        [-0.72620188],
        [-0.21647658],
     

In [None]:
def linear_regression_gradient_descent(A, b, grad_fn, verbose=False, lr=.01, epochs=100):
    start_time = time.time()
    w = np.zeros(A.shape[1])
    if verbose:
        print(f"epoch: -1, acc: {compute_accuracy(w,A,b)}")
    for _ in range(epochs):
        dw = grad_fn(w,A,b)
        dw /= A.shape[0]
        w -= (lr * dw)
        if verbose:
            print(f"epoch: {_}, acc: {compute_accuracy(w,A,b)}")

    print(f"Gradient Descent Time: {time.time() - start_time}")
    return w
    
def part3_d():
    A, x_star, b = part3_c()
    x = linear_regression_gradient_descent(A, b, l2_linear_regression_grad, lr=.1, epochs=50)
    print(f"Distance between x* and x: {linear_regression_vector_distance(x_star, x[:, None])}")
    return A, x_star, b, x

part3_d()

Gradient Descent Time: 0.06603026390075684
Distance between x* and x: 3.3993679008558115


(array([[-0.25196921,  0.32562965, -0.34282373, ...,  0.52980921,
         -0.23994131, -0.12733562],
        [-0.23774413,  0.19658396,  0.12172585, ..., -0.71538069,
          0.51640301,  0.70320663],
        [ 0.27207373, -0.43266605,  0.93181165, ..., -0.90011387,
          0.92911007, -0.74146054],
        ...,
        [-0.2112599 ,  0.87863461, -0.45774312, ...,  0.55573989,
          0.97338225,  0.32649215],
        [-0.71624701, -0.17135678, -0.99501427, ..., -0.65317285,
          0.84295723, -0.7784827 ],
        [-0.51738059, -0.27249285, -0.41145325, ...,  0.61913372,
          0.78937638,  0.26046094]]),
 array([[ 0.54858549],
        [ 0.04406302],
        [-0.85491655],
        [-0.414642  ],
        [-0.40657744],
        [-0.5559478 ],
        [-0.36264426],
        [-0.25326506],
        [ 0.45512638],
        [-0.26303948],
        [ 0.7778491 ],
        [ 0.89260672],
        [-0.3700174 ],
        [-0.55432392],
        [-0.50336659],
        [ 0.5525222 ],
     

In [None]:

def closed_form_Linear_Regression(A, b):
    return np.linalg.inv(A.T @ A) @ A.T @ b


def part3_e():
    A, x_star, b, x = part3_d()

    start_time = time.time()
    x_star_closed = closed_form_Linear_Regression(A, b)
    print(f"Closed Form Time: {time.time() - start_time}")
    print(f"Distance between closed form x* and gradient descent x*: {linear_regression_vector_distance(x_star, x_star_closed[:, None])}")

part3_e()

Gradient Descent Time: 0.07103323936462402
Distance between x* and x: 3.5939113483455425
Closed Form Time: 0.1999680995941162
Distance between closed form x* and gradient descent x*: 1.2181971435805035


In [None]:
def part4_f(x,y,n,a,b):
    total = 0
    for i in range(n * 2):
        total += (x - a[i])**2 + (y - b[i])**2

    return total / (2 * n)

def stochastic_gradient_descent(a, b, n, verbose=False, epochs=100):
    start_time = time.time()
    x_t, y_t = 1, 1

    for t in range(epochs):
        lr = .1 / np.sqrt(t + 1)
        i = np.random.randint(0, a.shape[0])
        a_, b_ = a[i], b[i]
        x_t -= 2 * lr * (x_t - a_)
        y_t -= 2 * lr * (y_t - b_)
        
        if verbose:
            print(f"epoch: {t}, x: {x_t}, y: {y_t}, f(x_t,y_t): {part4_f(x_t,y_t,n,a,b)}")
    return x_t, y_t


def part4_c():
    n = 500
    a, b = [], []
    for i in range(1, n + 1):
        a.append(i / n)
        b.append(-1)
    for i in range(n+1, 2*n + 1):
        a.append((i - n) / n)
        b.append(1)
        

    stochastic_gradient_descent(np.array(a), np.array(b), n, verbose=True, epochs=1000)

part4_c()

epoch: 0, x: 0.9716, y: 1.0, f(x_t,y_t): 2.3047973599999962
epoch: 1, x: 0.8797326869882438, y: 0.717157287525381, f(x_t,y_t): 1.7410860232440988
epoch: 2, x: 0.7970869950881485, y: 0.5188770430866397, f(x_t,y_t): 1.4402338945026651
epoch: 3, x: 0.7773782955793337, y: 0.5669893387779757, f(x_t,y_t): 1.4811948725552242
epoch: 4, x: 0.7579353897493964, y: 0.42683355151697566, f(x_t,y_t): 1.3315356752062708
epoch: 5, x: 0.7246276046524163, y: 0.4736323960658634, f(x_t,y_t): 1.3576699521656703
epoch: 6, x: 0.6917728459428736, y: 0.5134220468718825, f(x_t,y_t): 1.3833294769632554
epoch: 7, x: 0.6866977393357473, y: 0.40640694765784924, f(x_t,y_t): 1.2829832574989772
epoch: 8, x: 0.6733178900466975, y: 0.3126464844806593, f(x_t,y_t): 1.2107742794882614
epoch: 9, x: 0.6588145532312651, y: 0.3561185378159595, f(x_t,y_t): 1.2350588461877594
epoch: 10, x: 0.6661223106309648, y: 0.2743415130672153, f(x_t,y_t): 1.1858616432601181
epoch: 11, x: 0.6318206430335298, y: 0.3162374253442533, f(x_t,y_t):