In [161]:
import numpy as np
from numpy import loadtxt
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.utils import shuffle

In [162]:
class Regression:
    
    def __init__(self, feat_dims=0):
        # alpha is weight decay hyperparameter
        
        self.learning_rate = 0.000001
        self.epochs = 500
        self.batch_size = 100
        
        self.feat_dims = feat_dims
        self.output_classes = 1
        
        # create weights array/matrix size (num features x output)
        self.weights = 0.001 * np.random.rand(self.feat_dims, self.output_classes)
        self.alpha = 0.2  # regularization strength
        
        self.y_mean = None
        
    def normalize_feat(self, x, mean=None, std=None):
        # normalize the feature data.  test data must pass mean and std
        
        # calc feature-wise mean
        if mean is None:
            mean = np.mean(x, axis=0)
            
        # calc feature-wise std
        if std is None:
            std = np.std(x, axis=0)
        
        # sub the mean per column
        x_norm = x - mean

        # div by the standard dev.
        x_norm = x_norm / std

        return x_norm, mean, std
        
    def load_data(self, fname, bias=1):
        
        data = loadtxt(fname, delimiter=',')
        
        # loads data, normalizes, and appends a bias vector to the data

        TRAIN_NUM = 463714  # training data up to this point

        # process training data
        x_train = data[:TRAIN_NUM,1:].astype(float)  # parse train
        
        x_train, train_mean, train_std = self.normalize_feat(x_train)  # normalize data

        # create a col vector of ones
        col_bias = np.ones((x_train.shape[0], 1))

        # append bias with hstack
        x_train = np.hstack((x_train, col_bias))
        
        # convert label vals to int and to vector
        y_train = data[:TRAIN_NUM,0].astype(int)
        y_train = y_train.reshape((-1, 1))

        # -------------------
        
        # process test data
        x_test = data[TRAIN_NUM:,1:].astype(float)  # parse test
        x_test, _, _ = self.normalize_feat(x_test, train_mean, train_std)  # normalize data

        # create a col vector of ones
        col_bias = np.ones((x_test.shape[0], 1))

        # append bias with hstack
        x_test = np.hstack((x_test, col_bias))    

        # convert label vals to int and to vector
        y_test = data[TRAIN_NUM:,0].astype(int)
        y_test = y_test.reshape((-1, 1))  # convert to column vector
        
        return x_train, y_train, x_test, y_test

    def musicMSE(self, pred, gt):

        # make sure to floor by converting to int()
        diff = pred - gt
        mse = (np.square(diff)).mean()

        return mse
    
    def label_sub_mean(self, label):
        
        # find the mean
        self.y_mean = np.mean(label)
        
        # sub mean
        temp = label - self.y_mean
        
        return temp

    def train_loss(self, x, yt_sm):
        # calc the cost
        # yt = true label, sub mean label
        
        n_samples = x.shape[0]
        
        # predict
        pred_y = np.exp(np.dot(x, self.weights))
        
        # (x dot w)
        x_dot_w = np.dot(x, self.weights)
        
        # calc y dot times x_dot_w
        x_prod_y = x_dot_w * yt_sm
    
        # calc the diff, and divide
        loss = np.sum((pred_y - x_prod_y)) / n_samples
    
        return loss 
    
    def test_loss(self, x, yt_sm):
        # calc the cost at test time
        # yt = true label, is regular label
        # this function adds the y mean back
        
        n_samples = x.shape[0]  

        # predict
#         pred_y = np.exp(np.dot(x, self.weights)) + self.y_mean
        pred_y = np.exp(np.dot(x, self.weights))
    
        # (x dot w)
        x_dot_w = np.dot(x, self.weights)
        
        # need to add the mean back to label
#         yt = yt_sm + self.y_mean
        yt = yt_sm
    
        # calc y dot times x_dot_w
        x_prod_y = x_dot_w * yt
        
    
        # calc the diff, divide, and add the mean
        loss = np.sum((pred_y - x_prod_y)) / n_samples
    
        return loss 
    
    def gradient(self, x, yt_sm):
        
        n_samples = x.shape[0]

        pred_y = np.exp(np.dot(x, self.weights))
        
        x_trans_dot_pred_y = np.dot(x.T, pred_y)
        
        dW = x_trans_dot_pred_y - np.dot(x.T, yt_sm)
        
        # return the avg dW
        return dW / n_samples

    def calc_mse(self, x, y_sm):
        # preprocesses (adds the y_mean back to both x and y, and calls musicMSE)
        
        # predict
        pred_y = np.dot(x, self.weights)
        
        # add the y mean to the pred and convert to int to round
        pred_y += self.y_mean
        
        # convert to int to round
        pred_y = pred_y.astype(int)
        
        # add the y mean back to the labels
        y_labels = y_sm + self.y_mean
        
        # convert to int to round
        y_labels = y_labels.astype(int)
        
        # calc the MSE
        mse = self.musicMSE(pred_y, y_labels)
        
#         print('MSE: ', mse)
        
        return mse, pred_y

    def train_phase(self, x_train, y_train_sm):
        # shuffle data together, and forward prop by batch size, and add momentum

        num_train = x_train.shape[0]
        losses = []
        # Randomize the data (using sklearn shuffle)
        x_train, y_train_sm = shuffle(x_train, y_train_sm)

        # get the next batch (loop through number of training samples, step by batch size)
        for i in range(0, num_train, self.batch_size):

            # grab the next batch size
            x_train_batch = x_train[i:i + self.batch_size]
            y_train_batch_sm = y_train_sm[i:i + self.batch_size]

            # calc loss
            loss = self.train_loss(x_train_batch, y_train_batch_sm)
            
            dW = self.gradient(x_train_batch, y_train_batch_sm)
            
            self.weights -= dW * self.learning_rate  # update the weights
            
            losses.append(loss)  # save the losses

        return np.average(losses)  # return the average

    def test_phase(self, x, y_sm):
        # extra, but more explicit calc of loss and gradient during testing (no back prop)
        
        # calc loss
        loss = self.test_loss(x, y_sm)
        
        return loss
        
    def run_epochs(self, x_train, y_train_sm, x_test, y_test_sm):
        # start the training/valid by looping through epochs

        # store losses and accuracies here
        train_losses = []
        test_losses = []
        train_mse_arr = []
        test_mse_arr = []

        for e in range(self.epochs): # loop through epochs

            print('Epoch {} / {}...'.format(e + 1, self.epochs))

            # calc loss and accuracies
            train_loss = self.train_phase(x_train, y_train_sm)
            test_loss = self.test_phase(x_test, y_test_sm)
            
            train_mse, train_preds = self.calc_mse(x_train, y_train_sm)
            test_mse, test_preds = self.calc_mse(x_test, y_test_sm)

            # append vals to lists
            train_losses.append(train_loss)
            test_losses.append(test_loss)
            train_mse_arr.append(train_mse)
            test_mse_arr.append(test_mse)
            
            print('train loss: ', train_loss)
            print('test loss: ', test_loss)
            print('train MSE: ', train_mse)
            print('test MSE: ', test_mse)
        
#         return train_losses, test_losses

        # return all the vals
        return train_losses, test_losses, train_mse_arr, test_mse_arr, test_preds

#     def closed_form(self, x, yt):
#         # for ridge regression only
#         # yt is regular labels
#         # returns the weights w that allow you to find the prediction

#         xt = np.transpose(x)
#         alpha_identity = self.alpha * np.identity(len(xt))


#         theInverse = np.linalg.inv(np.dot(xt, x) + alpha_identity)
#         w = np.dot(np.dot(theInverse, xt), yt)
#         return w
    
    
    def plot_graph(self, train_losses, test_losses, train_mse, test_mse):
        # plot graph
        plt.subplot(1, 2, 1)
        plt.plot(train_losses, label="Train loss")
        plt.plot(test_losses, label="Test loss")
        plt.legend(loc='best')
        plt.title("Epochs vs. Loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss (Cross entropy)")

        plt.subplot(1, 2, 2)
        plt.plot(train_mse, label="Train MSE")
        plt.plot(test_mse, label="Test MSE")
        plt.legend(loc='best')
        plt.title("Epochs vs MSE")
        plt.xlabel("Epochs")
        plt.ylabel("MSE")
        plt.show()
        # save plot 
        plt.savefig('./regression_loss_acc')

    def make_mesh_grid(self, x, y, h=0.02):
        # make a mesh grid for the decision boundary
        
        x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
        y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
        x_x, y_y = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
        return x_x, y_y  # matrix of x-axis and y-axis

    def plot_contours(self, plt, x_x, y_y, **params):
        # plot contours    

        array = np.array([x_x.ravel(), y_y.ravel()])
        f = np.dot(array.T, self.weights)
        prob = self.softmax(f)
        Q = np.argmax(prob, axis=1) + 1
        Q = Q.reshape(x_x.shape)
        plt.contourf(x_x, y_y, Q, **params)  # takes in variable number of params

    def plot_decision_boundary(self, x, y):
        # plot decision boundary

        markers = ('o', '.', 'x')
        colors = ('yellow', 'grey', 'green')
        cmap = ListedColormap(colors[:len(np.unique(y))])
        x_x, y_y = self.make_mesh_grid(x, y)
        self.plot_contours(plt, x_x, y_y, cmap=plt.cm.coolwarm, alpha=0.8)
        
        # plot training points
        for idx, cl in enumerate(np.unique(y)):
            xBasedOnLabel = x[np.where(y[:,0] == cl)]
            plt.scatter(x=xBasedOnLabel[:, 0], y=xBasedOnLabel[:, 1], c=cmap(idx),
                        cmap=plt.cm.coolwarm, marker=markers[idx], label=cl)
        plt.xlim(x_x.min(), x_x.max())
        plt.ylim(y_y.min(), y_y.max())
        plt.xlabel("x1")
        plt.ylabel("x2")
        plt.title("Decision Boundary - Softmax Classifier")
        plt.legend(loc='upper left')
        plt.show()
        
    def plot_weights(self):
        
        plt.hist(self.weights, bins=12)
        plt.xlabel('bins')
        plt.ylabel('count')
        plt.title('Lasso Regression Weights Histogram')
        plt.show()


In [163]:
# create Regression() object to load data
regr = Regression()

In [164]:
# # load the data
# fname = 'YearPredictionMSD.txt'
# x_train, y_train, x_test, y_test = regr.load_data(fname)

In [165]:
# ==========  Ridge Regression Training  =============

feat_dims = x_train.shape[1]

# create Regression() object to run training
regr = Regression(feat_dims)

# convert labels to floats
y_train = y_train.astype(float)
y_test = y_test.astype(float)

# sub mean from y labels
y_train_sm = regr.label_sub_mean(y_train)
y_test_sm = regr.label_sub_mean(y_test)

train_losses, test_losses, train_mse_arr, test_mse_arr, test_preds = regr.run_epochs(x_train, y_train_sm, x_test, y_test_sm)



Epoch 1 / 500...
train loss:  0.8976223768246411
test loss:  6.795063738042287
train MSE:  119.82701190820204
test MSE:  3994103.795045612
Epoch 2 / 500...
train loss:  0.6853073459504554
test loss:  14.13523119738507
train MSE:  119.79489944232867
test MSE:  3994111.8862505085
Epoch 3 / 500...
train loss:  0.47997861275048287
test loss:  21.545770382577388
train MSE:  119.59691965306202
test MSE:  3994145.925025663
Epoch 4 / 500...
train loss:  0.28157627933221113
test loss:  29.061121039235587
train MSE:  119.25164648899968
test MSE:  3994192.7284383415
Epoch 5 / 500...
train loss:  0.08853140194751014
test loss:  36.72135900906519
train MSE:  118.84128794903755
test MSE:  3994218.2208556873
Epoch 6 / 500...
train loss:  -0.09667425029544614
test loss:  44.539474838155805
train MSE:  118.44401721750907
test MSE:  3994241.1872131084
Epoch 7 / 500...
train loss:  -0.27696031139645283
test loss:  52.55993045016579
train MSE:  118.03767624009626
test MSE:  3994265.2298231684
Epoch 8 / 50

train loss:  -4.909123492084312
test loss:  828.5621378793177
train MSE:  109.90347067373425
test MSE:  3995778.9856094206
Epoch 61 / 500...
train loss:  -4.960381520512827
test loss:  846.6986582684137
train MSE:  109.85090810283926
test MSE:  3995815.710154752
Epoch 62 / 500...
train loss:  -5.012169506485266
test loss:  864.8911834438592
train MSE:  109.79365514088425
test MSE:  3995850.0100521003
Epoch 63 / 500...
train loss:  -5.066376140006855
test loss:  883.1427735690512
train MSE:  109.74114432602855
test MSE:  3995887.9656407973
Epoch 64 / 500...
train loss:  -5.114914726334454
test loss:  901.4084163465808
train MSE:  109.69539198730251
test MSE:  3995925.1279851254
Epoch 65 / 500...
train loss:  -5.167321854645992
test loss:  919.7771327903274
train MSE:  109.64309897911212
test MSE:  3995964.4189537293
Epoch 66 / 500...
train loss:  -5.221930958502773
test loss:  938.1792237766643
train MSE:  109.59635464963318
test MSE:  3996002.9931242857
Epoch 67 / 500...
train loss:  -

KeyboardInterrupt: 

In [None]:
# plot loss and accuracies graphs

regr.plot_graph(train_losses, test_losses, train_mse_arr, test_mse_arr)

In [None]:
# plot the weights
regr.plot_weights()

In [None]:
test_preds