#### Use the housing and yacht dataset to estimate the regression weights using normal equations. Contrast the performance (measured 

#### through RMSE) to the results obtained using the gradient descent algorithm, based on a ten-fold cross validation scheme. In this 

#### problem you will calculate the analytical solution that we obtained through Normal equations to learn your weight vector, and contrast 

#### the performance (training and test RMSE) for the same fold with your gradient-descent based implementation for problem-1.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from csv import reader
from sklearn.preprocessing import StandardScaler
from random import randrange
import operator
%matplotlib inline  

def linear_grad_func(theta, x, y):
    # compute gradient
    grad = np.dot((linear_val_func(theta, x) - y).T, np.c_[np.ones(x.shape[0]), x])
    grad = grad / x.shape[0]

    return grad

def linear_val_func(theta, x):
    # forwarding
    return np.dot(np.c_[np.ones(x.shape[0]), x], theta)


def linear_cost_func(theta, x, y):
    # compute cost (loss)
    y_hat = linear_val_func(theta, x)
    cost = np.mean((y_hat-y)**2)
    return cost


def linear_grad_desc(theta, X_train, Y_train, lr, max_iter, tolerance):
    cost_iter = []
    cost = linear_cost_func(theta, X_train, Y_train)
    RMSE_iter = []
    RMSE_iter.append(np.sqrt(np.sum((linear_val_func(theta, X_train) - Y_train)**2) / Y_train.shape[0]))
    cost_change = 1
    i = 1

    while cost_change > tolerance and i < max_iter:
        pre_cost = cost
        # compute gradient
        grad = linear_grad_func(theta, X_train, Y_train)
        
        # update gradient
        theta = theta - lr * grad

        # compute loss
        cost = linear_cost_func(theta, X_train, Y_train)
        RMSE_iter.append(np.sqrt(np.sum((linear_val_func(theta, X_train) - Y_train)**2) / Y_train.shape[0]))
        cost_change = abs(cost - pre_cost)
        i += 1

    return theta, RMSE_iter

def load_dataset(filename):
    '''Loads an example of market basket transactions from a provided csv file.

    Returns: A list (database) of lists (transactions). Each element of a transaction is
    an item.
    '''
    with open(filename, 'r') as dest_f:
        data_iter = reader(dest_f, delimiter=',', quotechar='"')
        data = [data for data in data_iter]
        data_array = np.asarray(data)

    return data_array


# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

def linear_regression(dataset, n_folds, lr, max_iter, tolerance):
    # split dataset into training and testing
    dataset_split = cross_validation_split(dataset, n_folds)
    RMSE_train = []
    RMSE_test = []
    SSE_train = []
    SSE_test = []
    
    for i in range(n_folds):
        test = np.array(dataset_split[i])
        train = list(dataset_split)
        train.pop(i)
        train = np.array(reduce(operator.add, train))
        
        # Normalize X_Train
        X_train = train[:, :-1]
        scaler = StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        
        #Get the mean and std to normalize the test dataset
        X_test = test[:, :-1]
        X_test = scaler.transform(X_test)
        
        Y_train = train[:, -1]
        Y_test = test[:,-1]
        
        Y_train = Y_train[:, None]
        Y_test = Y_test[:, None]

        # Linear regression
        #  Initialize the weights for the gradient descent algorithm to all zeros
        #theta = np.zeros((1, X_train.shape[1] + 1))
        theta = np.random.rand(1, X_train.shape[1] + 1)
        fitted_theta, RMSE_iter = linear_grad_desc(theta, X_train, Y_train, lr, max_iter, tolerance)
        
        # 
        if i == 0:
            plt.figure()
            plt.plot(range(len(RMSE_iter)), RMSE_iter) 
            plt.xlabel('Iteration')
            plt.ylabel('RMSE')
        
        RMSE_test.append(np.sqrt(np.sum((linear_val_func(fitted_theta, X_test) - Y_test)**2) / Y_test.shape[0]))
        RMSE_train.append(np.sqrt(np.sum((linear_val_func(fitted_theta, X_train) - Y_train)**2) / Y_train.shape[0]))
        SSE_test.append(np.sum((linear_val_func(fitted_theta, X_test) - Y_test)**2))
        SSE_train.append(np.sum((linear_val_func(fitted_theta, X_train) - Y_train)**2))
        print('Train RMSE: {}'.format(RMSE_train[i]))
        print('Test RMSE: {}'.format(RMSE_test[i]))
    print('Overall Mean Train RMSE: {}'.format(np.sum(RMSE_train)*1./len(RMSE_train)))
    print('Overall Mean Test RMSE: {}'.format(np.sum(RMSE_test)*1. / len(RMSE_test)))
    print('Overall Mean Train SSE: {}'.format(np.sum(SSE_train)*1./len(SSE_train)))
    print('Overall Mean Test SSE: {}'.format(np.sum(SSE_test)*1. / len(SSE_test)))
    print('std of train SSE: {}'.format(np.std(np.array(SSE_train), axis=0)))
    print('std of test SSE: {}'.format(np.std(np.array(SSE_test), axis=0)))
    
    
def normal_equation(X, y):
    # add bias to x
    X_b = np.c_[np.ones(X.shape[0]), X]
    return np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

def normal_equation_eval(dataset, n_folds):
    dataset_split = cross_validation_split(dataset, n_folds)
    RMSE_train = []
    RMSE_test = []
    SSE_train = []
    SSE_test = []
    
    for i in range(n_folds):
        test = np.array(dataset_split[i])
        train = list(dataset_split)
        train.pop(i)
        train = np.array(reduce(operator.add, train))
        
        # Normalize X_Train
        X_train = train[:, :-1]
        scaler = StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        
        #Get the mean and std to normalize the test dataset
        X_test = test[:, :-1]
        X_test = scaler.transform(X_test)
        
        Y_train = train[:, -1]
        Y_test = test[:,-1]
        
        Y_train = Y_train[:, None]
        Y_test = Y_test[:, None]

        # Linear regression
        #  Initialize the weights for the gradient descent algorithm to all zeros
        fitted_theta = normal_equation(X_train, Y_train)
        RMSE_test.append(np.sqrt(np.sum((linear_val_func(fitted_theta, X_test) - Y_test)**2) / Y_test.shape[0]))
        RMSE_train.append(np.sqrt(np.sum((linear_val_func(fitted_theta, X_train) - Y_train)**2) / Y_train.shape[0]))
        SSE_test.append(np.sum((linear_val_func(fitted_theta, X_test) - Y_test)**2))
        SSE_train.append(np.sum((linear_val_func(fitted_theta, X_train) - Y_train)**2))
        print('Train RMSE: {}'.format(RMSE_train[i]))
        print('Test RMSE: {}'.format(RMSE_test[i]))
    print('Overall Mean Train RMSE: {}'.format(np.sum(RMSE_train)*1./len(RMSE_train)))
    print('Overall Mean Test RMSE: {}'.format(np.sum(RMSE_test)*1. / len(RMSE_test)))
    print('Overall Mean Train SSE: {}'.format(np.sum(SSE_train)*1./len(SSE_train)))
    print('Overall Mean Test SSE: {}'.format(np.sum(SSE_test)*1. / len(SSE_test)))
    print('std of train SSE: {}'.format(np.std(np.array(SSE_train), axis=0)))
    print('std of test SSE: {}'.format(np.std(np.array(SSE_test), axis=0)))
    

def main():
    dataset = load_dataset("housing.csv")
    dataset = dataset.astype(float)
    
    print('Housing dataset Normal Equation')
    normal_equation_eval(dataset, n_folds=10)
    print ('')
    
    dataset = load_dataset("yachtData.csv")
    dataset = dataset.astype(float)
    print('Yacht dataset Normal Equation')
    normal_equation_eval(dataset, n_folds=10)
    print ('')
    
    dataset = load_dataset("concreteData.csv")
    dataset = dataset.astype(float) 
    print('Concrete dataset Normal Equation')
    normal_equation_eval(dataset, n_folds=10)
    print ('')
    
#    print('sklearn Linear Regression Example')
#    sklearn_linear_regression(dataset, n_folds=10)

if __name__ == "__main__":
    main()

Housing dataset Normal Equation
Train RMSE: 4.73601227233
Test RMSE: 4.45567333622
Train RMSE: 4.75007070269
Test RMSE: 4.3291193144
Train RMSE: 4.55732987289
Test RMSE: 5.9368657932
Train RMSE: 4.59350438308
Test RMSE: 5.72835767431
Train RMSE: 4.84450149131
Test RMSE: 3.23421974139
Train RMSE: 4.6858984292
Test RMSE: 5.00967766154
Train RMSE: 4.6331047257
Test RMSE: 5.44128495769
Train RMSE: 4.74629679641
Test RMSE: 4.40096739993
Train RMSE: 4.64830483118
Test RMSE: 5.28858909461
Train RMSE: 4.76091307625
Test RMSE: 4.2730793073
Overall Mean Train RMSE: 4.6955936581
Overall Mean Test RMSE: 4.80978342806
Overall Mean Train SSE: 9924.99230709
Overall Mean Test SSE: 1187.08140521
std of train SSE: 351.973891117
std of test SSE: 365.543995648

Yacht dataset Normal Equation
Train RMSE: 8.61969355408
Test RMSE: 11.3306061535
Train RMSE: 8.91939250885
Test RMSE: 8.8481045397
Train RMSE: 8.62119112226
Test RMSE: 11.3246136435
Train RMSE: 9.00936341546
Test RMSE: 7.86710705303
Train RMSE: 8.7