In [1]:
#import libs
import numpy.linalg as linalg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#get polynomial function
def getPolynomialDataMatrix(x, degree):
    tmp = np.ones(x.shape)
    for i in range(1,degree + 1):
        tmp = np.column_stack((tmp, x ** i))
    return tmp

#polynomial regression function
def pol_regression(features_train, y_train, degree):
    #if degree is 0 return mean of y
    if degree == 0:
        return [np.mean(y_train)]
    else:
        #get the data matrix
        tmp = getPolynomialDataMatrix(features_train, degree)
        #get product of self and transposed self 
        tmp2 = tmp.transpose().dot(tmp)
        #solve the equation using the modified training data and return parameters
        parameters = np.linalg.solve(tmp2, tmp.transpose().dot(y_train))
        return parameters

#evaluating polynomial regression function
def eval_pol_regression(parameters, x, y, degree):
    #if degree is zero dont .dot, multiply instead when getting data matrix
    if degree == 0:
        tmp = getPolynomialDataMatrix(x, degree)*(parameters)
    else:
        tmp = getPolynomialDataMatrix(x, degree).dot(parameters)
    #calculate root mean squared errors and return
    rmse = np.sqrt(np.mean(np.square(np.subtract(tmp,y))))
    return rmse

#used to plot the points taking in the weights, label and degrees
def plot(w, text, degree):
    #plot test and training points
    plt.plot(testX,testY, 'co')
    plt.plot(trainX, trainY, 'mo')
    
    #check if 0 and draw line
    if degree != 0:
        plt.plot(ls, getPolynomialDataMatrix(ls, degree).dot(w), 'm')
    else:
        plt.plot(ls, getPolynomialDataMatrix(ls, degree)*w, 'm')
    
    #add legend and plot
    plt.legend(('Ground truth', 'Training data', text), loc = 'lower right')
    plt.show()
    plt.figure()
    
#evaluate the error of the test data and plot    
def evaluate(x, y, col):
    data0 = eval_pol_regression(w0, x, y, 0) 
    data1 = eval_pol_regression(w1, x, y, 1)
    data2 = eval_pol_regression(w2, x, y, 2)
    data3 = eval_pol_regression(w3, x, y, 3)
    data5 = eval_pol_regression(w5, x, y, 5)
    data10 = eval_pol_regression(w10, x, y, 10)
    plt.plot([0,1,2,3,5,10],[data0,data1,data2,data3,data5,data10], col)
    plt.legend(('Test data', 'Training data'), loc = 'upper right')

#import dataset
csv = pd.read_csv('dataset_pol_regression.csv')

#split data into x and y matrix
csvX = csv['x'].as_matrix()
csvY = csv['y'].as_matrix()

#split data into training and test 70:30
trainX = csvX[0:14]
trainY = csvY[0:14]
testX = csvX[14:20]
testY = csvY[14:20]
    
#set line space to -5 to 5
ls = np.linspace(-5,5)

#perform the regression
w0 = pol_regression(trainX,trainY,0)
w1 = pol_regression(trainX,trainY,1)
w2 = pol_regression(trainX,trainY,2)
w3 = pol_regression(trainX,trainY,3)
w5 = pol_regression(trainX,trainY,5)
w10 = pol_regression(trainX,trainY,10)

#plot the regression charts
plot(w0, '$x^{0}$', 0)
plot(w1, '$x^{1}$', 1)
plot(w2, '$x^{2}$', 2)
plot(w3, '$x^{3}$', 3)
plot(w5, '$x^{5}$', 5)
plot(w10, '$x^{10}$', 10)

#limit y axis to 250 and redraw degree 10
axes = plt.gca()
axes.set_ylim([-250,250])

#replot degree 10 regression on limited axis
plot(w10, '$x^{10}$', 10)

#evaluate the test and train models
evaluate(testX, testY, 'c')
evaluate(trainX, trainY, 'm')

#plot the evaluation
plt.show()
plt.figure()



<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 0 Axes>