In [38]:
%matplotlib widget
import matplotlib.pyplot as plt
from sklearn.neighbors import KDTree
from data_utils import load_dataset
from nearest import knn
import numpy as np
from numpy.linalg import svd , inv

class linear():
    """
    This is the only place I need to use a linear model for this assignment so I keep it in this file
    Linear abstracts a linear model
    """
    def __init__(self, loss="least squares", w = None ):
        self.loss = loss
        self.w = w
    
    def calc_w(self, A, Y):
        '''
            Configures the weights of the model utilizing the closed form equation
        '''
        u, s, vh =svd(A.T, full_matrices = False) # economny version
        
        S_inv = inv(np.diag(s)) # calcuate the inverse
        self.w = u.dot(S_inv).dot(vh).dot(Y) # do the dot products
        return self.w
    
    def test_re(self,Xdata,T):
        """
        Calculate RMSE from predicted values from Xdata and T (True/Target)
        T is a matrix
        """
        return np.sqrt(np.mean(np.square(self.predict(Xdata) - T)))
    
    def test_cl(self, Xdata, T):
        '''
        test - classification
        One hot vector, will need to find the largest value then max it to one and set everything else to 0
        '''
        sample = self.predict(Xdata) # make predictions
        pred = np.equal(sample, np.max(sample,axis = 1)[:,None]) # convert to one hot based on largest value
        score = np.mean(pred == T) # compare and count correct
        return score
        
        
    def predict(self,X):
        '''
        Do a prediction on data matrix X
        X must be present in homogenous form (i.e extra 1 as the first feature)
        '''
        if self.w is None:
            print("Please initialize")
        return X.dot(self.w)

In [39]:
# Mauna_loa
# since I know what Mauna Loa looks like, I used it to develop my linear model
# I plotted the linear regression of the data against the test set as well as the train + validation set in
# the following figure.
l = linear()
xtrain, xvalid, xtest, ytrain, yvalid, ytest = load_dataset('mauna_loa')#, n_train=1000, d=2)
X = np.vstack([xtrain, xvalid])
Y = np.vstack([ytrain, yvalid])
# Need to make X values Homogenous
X = np.hstack([np.ones((X.shape[0], 1)),X])

l.calc_w(X, Y)
Xtest = np.hstack([np.ones((xtest.shape[0],1)), xtest])
print(f"Test Regression RMSE: {l.test_re(Xtest, ytest)}")
pred = l.predict(Xtest)


fig = plt.figure()
plt.plot( xtest, ytest)
plt.plot( xtest, pred)
plt.xlabel("x")
plt.ylabel("y")
plt.legend(["True", "Predicted"])
plt.title("Mauna Loa")
plt.show()


pred = l.predict(X)
fig = plt.figure()
plt.plot( np.vstack([xtrain, xvalid]), Y,"x")
plt.plot( np.vstack([xtrain, xvalid]), pred,"x")
plt.xlabel("x")
plt.ylabel("y")
plt.legend(["True", "Predicted"])
plt.title("Mauna Loa")
plt.show()






Test Regression RMSE: 0.34938831049910174


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [32]:
# Rosenbrock
# Visualizing this data set is not as easy so I don't do that here
# in a similar manner, I find the weights from the data then find RMSE on the test set
l = linear()
xtrain, xvalid, xtest, ytrain, yvalid, ytest = load_dataset('rosenbrock', n_train=1000, d=2)
X = np.vstack([xtrain, xvalid])
Y = np.vstack([ytrain, yvalid])
# Need to make X values Homogenous
X = np.hstack([np.ones((X.shape[0], 1)),X])

l.calc_w(X, Y)
Xtest = np.hstack([np.ones((xtest.shape[0],1)), xtest])
print(f"Test Regression RMSE: {l.test_re(Xtest, ytest)}")


Test Regression RMSE: 0.98408720306877


In [33]:
# Puma

l = linear()
xtrain, xvalid, xtest, ytrain, yvalid, ytest = load_dataset('pumadyn32nm')#, n_train=1000, d=2)
X = np.vstack([xtrain, xvalid])
Y = np.vstack([ytrain, yvalid])
# Need to make X values Homogenous
X = np.hstack([np.ones((X.shape[0], 1)),X])

l.calc_w(X, Y)
Xtest = np.hstack([np.ones((xtest.shape[0],1)), xtest])
print(f"Test Regression RMSE: {l.test_re(Xtest, ytest)}")


Test Regression RMSE: 0.8622512436598077


In [42]:
# iris
l = linear()
xtrain, xvalid, xtest, ytrain, yvalid, ytest = load_dataset('iris')#, n_train=1000, d=2)
X = np.vstack([xtrain, xvalid])
Y = np.vstack([ytrain, yvalid])
# Need to make X values Homogenous
X = np.hstack([np.ones((X.shape[0], 1)),X])

l.calc_w(X, Y)
Xtest = np.hstack([np.ones((xtest.shape[0],1)), xtest])
print( xtest.shape)
print(f"Test Correct Classifications: {l.test_cl(Xtest, ytest)}")



(15, 4)
Test Correct Classifications: 0.9111111111111111


In [43]:
# Mnist small 
l = linear()
xtrain, xvalid, xtest, ytrain, yvalid, ytest = load_dataset('mnist_small')#, n_train=1000, d=2)
X = np.vstack([xtrain, xvalid])
Y = np.vstack([ytrain, yvalid])
# Need to make X values Homogenous
X = np.hstack([np.ones((X.shape[0], 1)),X])

l.calc_w(X, Y)
Xtest = np.hstack([np.ones((xtest.shape[0],1)), xtest])
print( xtest.shape)
print(f"Test Correct Classifications: {l.test_cl(Xtest, ytest)}")



(1000, 784)
Test Correct Classifications: 0.971
