# Least Squares
In this homework we find a method to find the least-squares solution using gradient descent with a constant step size. In this code, we will compare the closed form solution to the iteratively solved solution.

In [None]:
#Run these to import all of the necessary files
import numpy as np
import scipy as sp
import scipy.io as spio
import scipy.signal as sig
import scipy.io.wavfile as wf
import matplotlib as plt
from pylab import *
import os
import re
import time
from matplotlib import rc
rc('text', usetex=True)
# Plots graphs in the browser window
%matplotlib inline

In [None]:
#generate A matrix and y vector to use least squares regression
#this code takes a while to run
np.random.seed(121234)
A = np.random.rand(30,2)
y = np.random.rand(A.shape[0],1)


## a)
Create the function LS_closed_sol which takes in a matrix A and vector y and returns the square error using the closed form solution of least squares regression. 
As a reminder, the squared error is: $$||A\vec{x}-\vec{y}||^2$$
The closed form solution for $\vec{x}$ is: $$\vec{x} = (A^TA)^{-1}A^T\vec{y}$$

In [None]:
def LS_closed_sol(A,y):
    """
    The following function computes the least squares solution x_star
    Inputs: 
    A : data matrix
    y : desired output
    
    Ouputs: 
    e2 : least squared error possible for the given system
    x_star : least squares solution for x
    y_star : best approximation of y
    """
    AT = A.T
    ATA = A.T.dot(A)
    x_star = np.linalg.inv(ATA).dot(AT).dot(y)
    y_star = A.dot(x_star)
    e2 = pow(norm(y_star-y),2)
    return e2, x_star, y_star

In [None]:
LS_closed_sol(A,y)

## B)
Given the step size $\alpha$, we can create our function to iteratively solve for $\vec{x}$.

Create a function LS_iter(A,y,alpha,x2,iters) which takes as input a matrix A, vector y, constant alpha, vector x2, and constant iters. The function should calculate 
    $$\vec{x}(t+1) = \vec{x}(t)-\alpha A^T(A\vec{x}-\vec{y})$$ 
up to time step $\vec{x}(\text{iters} - 1)$ as well as the squared error at each time step. The function should return an array which contains the squared error $||A\vec{x}(t)-\vec{y}||^2$, $x(t)$ and $y(t)$ for each time step.

In [None]:
def LS_iter(A,y,alpha,x2,e2,iters):
    """
    The following function iteratively calculates x(t) for the 
    following discrete time system:
        x2(t + 1) = x2(t) - alpha*A^T(Ax(t) - y)
    input: 
        A : Data matrix
        y : Desired output
        alpha : step size in gradient descent
        x2 : guess solution to equation A*x2 = y
        e2 : the final squared error that we are going to get
        iters : number of iterations for which x(t) is computed
    output:
        e : difference between squared error |A*x2 - y|^2 for each time step and the final e2 provided
        x : x2(t) for all time steps
        y : y(t) = A*x2(t) for all time steps
    """
    at = A.T
    ata = at.dot(A)
    alata = alpha*ata
    alaty = alpha*at.dot(y)
    e = []
    x = []
    p = x2
    estimates = []
    for i in range(iters):
        p = p-alata.dot(p)+alaty
        e.append(pow(norm(A.dot(p)-y),2)-e2)
        x.append(p)
        estimates.append(A.dot(p))
    return e, np.asarray(x), np.asarray(estimates)

## C) 

Now, let's plot the convergence of $x(t)$ and $y(t)$ as time progresses. Let's also plot their norms and the error as a function of time. 

Before you run the following code block, take a minute to guess how these plots would look. Do they make sense?

In [None]:
# First let's find the minimum and maximum eigenvalues
l, v = np.linalg.eig(A.T.dot(A))
l_min = np.min(l)
l_max = np.max(l)

# Next, let's calculate the least squares x_star, y_star and error e2
e2, x_star, y_star = LS_closed_sol(A, y)



# Finally, let's see how our solution x_iter and error evolves with 
# time/iterations
alpha_arr = [1/l_min, 2/(l_min + l_max), 1/l_max]
x2 = np.zeros((A.shape[1], 1))
iters = 30

for alpha in alpha_arr:
    e_iter, x_iter, y_iter = LS_iter(A, y, alpha, x2, e2, iters)
    
    fig=plt.figure(figsize=(12, 4), dpi= 80, facecolor='w', edgecolor='k')
    fig.suptitle('Gradient descent with step size = ' + str(alpha))
    plt.subplot('131')
    plt.title('Movement of $x$ towards $x^*$')
    plt.plot(x_iter[:, 0], x_iter[:, 1], '.', color='tab:blue')
    plt.plot(x_star[0], x_star[1], 'o', color='tab:red')
    plt.xlabel('$x_1$'); plt.ylabel('$x_2$')
    plt.legend(['$x$', '$x^*$'])

    plt.subplot('132')
    plt.title('Movement of $x$ with time')
    plt.plot(np.arange(iters), x_iter[:, 0], '-', color='tab:orange')
    plt.plot(np.arange(iters), x_iter[:, 1], '-', color='tab:purple')
    plt.xlabel('time or iterations'); plt.ylabel('components of $x$')
    plt.legend(['$x_1$', '$x_2$'])

    plt.subplot('133')
    plt.title('Gap in error $||\epsilon||^2$')
    plt.plot(np.arange(iters), e_iter)
    plt.xlabel('time or iterations'); plt.ylabel('Error')
    plt.yscale('log')
    plt.legend(['$\epsilon^2$ - closed-form-error'])

In [None]:
# Let's try this approach where the step size is even smaller! 
# and we can use more steps, to be near to a `continuous` approach to the solution
alpha_arr = [.01]
x2 = np.zeros((A.shape[1], 1))
iters = 300

for alpha in alpha_arr:
    e_iter, x_iter, y_iter = LS_iter(A, y, alpha, x2, e2, iters)
    
    fig=plt.figure(figsize=(12, 4), dpi= 80, facecolor='w', edgecolor='k')
    fig.suptitle('Gradient descent with step size = ' + str(alpha))
    plt.subplot('131')
    plt.title('Movement of $x$ towards $x^*$')
    plt.plot(x_iter[:, 0], x_iter[:, 1], '.', color='tab:blue')
    plt.plot(x_star[0], x_star[1], 'o', color='tab:red')
    plt.xlabel('$x_1$'); plt.ylabel('$x_2$')
    plt.legend(['$x$', '$x^*$'])

    plt.subplot('132')
    plt.title('Movement of $x$ with time')
    plt.plot(np.arange(iters), x_iter[:, 0], '-', color='tab:orange')
    plt.plot(np.arange(iters), x_iter[:, 1], '-', color='tab:purple')
    plt.xlabel('time or iterations'); plt.ylabel('components of $x$')
    plt.legend(['$x_1$', '$x_2$'])

    plt.subplot('133')
    plt.title('Gap in error $||\epsilon||^2$')
    plt.plot(np.arange(iters), e_iter)
    plt.xlabel('time or iterations'); plt.ylabel('Error')
    plt.yscale('log')
    plt.legend(['$\epsilon^2$ - closed-form-error'])