In [1]:
import numpy as np
import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

import sklearn.linear_model as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.utils import resample, shuffle


from classTest import ClassTest

from numba import jit

In [2]:
testObject = ClassTest(15)
print(testObject.getLayers())

15


In [3]:
def franke(x, y):
    term1 = 0.75*np.exp(-(0.25*(9*x-2)**2) - 0.25*((9*y-2)**2))
    term2 = 0.75*np.exp(-((9*x+1)**2)/49.0 - 0.1*(9*y+1))
    term3 = 0.5*np.exp(-(9*x-7)**2/4.0 - 0.25*((9*y-3)**2))
    term4 = -0.2*np.exp(-(9*x-4)**2 - (9*y-7)**2)
    return term1 + term2 + term3 + term4

In [4]:
def pval(order):
    return int((order + 1) * (order + 2) / 2)

`designMatrix()` creates the design matrix by evaluating all of these terms in the polynomial as a function of input vectors x and y.

In [5]:
@jit #makes the function faster
def designMatrix(x, y, order):
    n = x.size
    p = int((order + 1) * (order + 2) / 2) # number of columns in X (can't use the function pval() due to @jit)
    X = np.zeros((n, p))
    feature = 0
    for x_power in range(order + 1):
        for y_power in range(order - x_power + 1):
            X[:, feature] = x**x_power * y**y_power
            feature += 1
    return X

`data()` generates datapoints from the franke function with noise at random points (x, y), and returns the design matrix and z-values. We found that n = 100 is few enough datapoints to see overfitting, so we will use 100 datapoints for the franke function for the rest of this notebook.

In [6]:
n = 100 # datapoints
def data(n, order):
    x = np.random.rand(n) #an array of n unordered uniform random numbers from 0 to 1
    y = np.random.rand(n)
    noise = np.random.randn(n) / 10
    
    z = franke(x, y) + noise
    X = designMatrix(x, y, order)
    
    return X, z

`scale()` scales training and test data according to the training data.

In [7]:
def scale(X_train, X_test):
    scaler = StandardScaler() #subtracts mean from each feature and divides by the standard deviation
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_train[:, 0] = 1 # scaling removed the intercept terms
    X_test = scaler.transform(X_test)
    X_test[:, 0] = 1
    return X_train, X_test

In [8]:
def OLS(X, z, lmda=None, p=None):
    beta = np.linalg.inv(X.T @ X) @ X.T @ z
    return beta

def ridge(X, z, lmda, p):
    beta = np.linalg.inv(X.T @ X + lmda * np.eye(p)) @ X.T @ z
    return beta

from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
@ignore_warnings(category=ConvergenceWarning)
def lasso(X, z, lmda, p):
    clf = skl.Lasso(lmda)
    clf.fit(X,z)
    return clf.coef_



In [9]:
from SGD import SGDLinReg

In [10]:
# Setup of data
order = 5 # max order of polynomials

X, z = data(n, order)
X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = 0.2)
X_train, X_test = scale(X_train, X_test)

In [12]:
beta = np.zeros(pval(order))
# OLS regression
for i in range(10000):
    beta = SGDLinReg(X_train, beta, z_train, 0.0001)
#beta = OLS(X_train, z_train)
# Evaluation of model
z_mdl = X_test @ beta
print(f"Mean squared error on training data = {mean_squared_error(X_train @ beta, z_train):.4f}")
print(f"Mean squared error = {mean_squared_error(z_mdl, z_test):.4f}")

Mean squared error on training data = 0.0131
Mean squared error = 0.0277


In [None]:
Mean squared error on training data = 0.0184
Mean squared error = 0.0369

