In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from common import *
import pandas as pd
#from mpl_toolkits.mplot3d import Axes3D
import sklearn.linear_model as lm
#

print(f"Root directory: {os.getcwd()}")

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
    "font.size": 10,
})

#  Franke function 3D preview
First we plot a 3D plot of the franke function.
The plot is based on the provided code in the assignmentext for plotting the franke function 

In [None]:
# Preview plot of the franke function
#%matplotlib
SEED_VALUE = np.random.seed(4155)
n = 40
y = x = np.sort(np.random.uniform(0,1,n))
x, y = np.meshgrid(x,y)
z = FrankeFunction(x, y)
z_noisy = z + noise_factor(n, factor=0.2)

fig = plt.figure()
# Ploting frankefunction without noise
ax1 = fig.add_subplot(111, projection='3d') # Are :)steike
ax1.title.set_text("Plot of the Franke Function")
ax1.view_init(elev=30., azim=-25.0)
ax1.set_xlabel("x"); ax1.set_ylabel("y"); ax1.set_zlabel("z")
surf1 = ax1.plot_surface(x,y,z, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
# Customize the z axis.
ax1.set_zlim(-0.10, 1.40)
ax1.zaxis.set_major_locator(LinearLocator(10))
ax1.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
# plt.savefig(f"{REPORT_FIGURES}{EX1}franke_function_nonoise_preview.pdf")

# Ploting frankefunction with noise
fig = plt.figure()
ax2 = fig.add_subplot(111, projection='3d')
ax2.title.set_text("Plot of the Franke Function\n(0.2*Gaussian Noise added)")
ax2.view_init(elev=30., azim=-25.0)
ax2.set_xlabel("x"); ax2.set_ylabel("y"); ax2.set_zlabel("z")
surf2 = ax2.plot_surface(x,y,z_noisy, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
# Customize the z axis.
ax2.set_zlim(-0.10, 1.40)
ax2.zaxis.set_major_locator(LinearLocator(10))
ax2.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))
# plt.savefig(f"{REPORT_FIGURES}{EX1}franke_function_noise_preview.pdf")
plt.show()

# 1 - Ordinary Least Squeares (OLS)

## 1.1 Data
Defining and creating the data


In [None]:
SEED_VALUE = np.random.seed(4155)
n = 100 # The number of points in direction for the Franke Function
x = np.sort(np.random.uniform(0, 1, n))
y = np.sort(np.random.uniform(0, 1, n))
x, y = np.meshgrid(x,y)
z = FrankeFunction(x, y) + noise_factor(n,factor=0.2)

## 1.2 Plot of fit for all degrees before evaluation
 We plot the fit up to degree 6 to get an intuition on the curvature of the fitted models

In [None]:
fig = plt.figure(figsize=(8,8))
degrees = 6
z_train_OLS = pd.DataFrame()
z_hat_train_OLS = pd.DataFrame()
z_test_OLS = pd.DataFrame()
z_hat_test_OLS = pd.DataFrame()

# TODO: Must fix so that training and test data are used. 
# Must evalute model using MSE from traning and test
for degree in range(1, degrees + 1):
    X = create_X(x, y, degree) # Design MatrixS
    X_train, X_test, z_train, z_test = prepare_data(X, z.ravel(), SEED_VALUE, scale_X=True, skip_intercept=True)
    model = OLS() # The model
    #model.fit(X, z) # Fitting the model
    z_hat_train = model.fit(X_train, z_train) # Fitting the model
    z_hat = model.predict(X[:,1:]) # predict on train data
    
    # Plot
    ax = fig.add_subplot(3,2, degree, projection='3d')
    ax.view_init(elev=30., azim=-25.0)
    ax.title.set_text(f"OLS/Linear fit of degree{degree}")
    ax.set_xlabel("x"); ax.set_ylabel("y"); ax.set_zlabel("z")
    ax.scatter3D(y, x, z_hat, c=z_hat ,marker = '.', cmap=cm.coolwarm)
fig.suptitle("OLS fit to the Franke Function")
plt.tight_layout()
# plt.savefig(f"{REPORT_FIGURES}{EX1}franke_function_OLS_fit.pdf")
plt.show()

## 1.3 - Finding degree/model complexity for the optimal OLS fit
Approximate the franke function using ordinary least squares
We estimate the franke functinon using polynomials up to 6th degree. We than look at the MSE scores to look for overfitting. We use the MSE score values from the test data to determine overfit together with the curvature of the evaluation plot

### Confidence intervall 
$$CI_{0.95}(\hat\beta_i) = [\hat\beta_i-1.96 SE(\hat\beta_i), \hat\beta_i+1.96 SE(\hat\beta_i)] =\hat\beta_i \pm 1.96\hat \sigma(\hat\beta_i)$$ 
In order to estimate the variance of the $i$-th beta values: $$\sigma^2 (\beta_i ) = \sigma^2 [(X^{T} X)^{-1}]_{ii}$$
However, $\sigma$ is unkown and can be generaly estimated as followed:
$$\hat\sigma^2 = \frac{\sum_{i=0}^{N-1}(y_i - \hat y_i)^2}{N}$$
For simplification purposes, we N instead of N-p-1 in the denominator.<br>
To get the variance estimate of each $\beta$ component one must calculate the variance with respect to the diagonal elements of $(X^TX)^{-1}$ Estimated standard error is the square root of $\hat\sigma^2$, where the estimate for variance $\hat\sigma^2$ is:
$$\hat\sigma^2 = \frac{\sum_{i=0}^{N-1}(y_i - \hat y_i)^2}{N}(X^TX)^{-1}$$
Where y is the true value, and $\hat y$ being the predicted value. <br>
The variance estimate of each $\hat\beta$ estimate can be written as:
$$\hat\sigma_{\hat\beta_i}^2 = \hat\sigma^2(X^TX)_{i,i}^{-1}$$



In [None]:
SEED_VALUE = np.random.seed(4155)
n = 20 # The number of points in direction for the Franke Function
x = np.sort(np.random.uniform(0, 1, n))
y = np.sort(np.random.uniform(0, 1, n))
x, y = np.meshgrid(x,y)
noise = 0.05
z = FrankeFunction(x, y) + noise_factor(n,factor=noise)

In [None]:
degrees = 5

# Setting up dataframes for the observed values
z_train_OLS = pd.DataFrame()
z_test_OLS = pd.DataFrame()

# Setting up the dataframes for our computed values
z_hat_train_OLS = pd.DataFrame()
z_hat_test_OLS = pd.DataFrame()

# Setting up the dataframes for our SVD computed values
z_hat_train_SVD = pd.DataFrame()
z_hat_test_SVD = pd.DataFrame()

# Setting up dataframes for sklearn computed values
z_hat_train_SK = pd.DataFrame()
z_hat_test_SK = pd.DataFrame()

coeffs_df = pd.DataFrame()

for degree in range(1, degrees+1):
    X = create_X(x, y, degree) # Design Matrix
    
    # Scaling data and splitting it into training and test sets
    X_train, X_test, z_train, z_test = prepare_data(X, z.ravel(), test_size=0.2, shuffle=True, scale_X=True, scale_t=False, skip_intercept=True, random_state=SEED_VALUE)
    
    # Model construction, fitting, and predictions using matrix inversion
    model = OLS() # The model
    z_hat_train = model.fit(X_train, z_train) # Fitting the model and predict on training data
    z_hat_test = model.predict(X_test) # predict on test data

   

    # Model construction, fitting and predictions using sklearn
    model_sk = lm.LinearRegression(fit_intercept=False)
    model_sk.fit(X_train, z_train)
    z_hat_train_sk = model_sk.predict(X_train)
    z_hat_test_sk = model_sk.predict(X_test)
    
    # Evaluatation metrics OLS, SVD, SK
    MSE_score_train = MSE(z_train, z_hat_train)
    R2_score_train = R2(z_train, z_hat_train)
    MSE_score_test = MSE(z_test, z_hat_test)
    R2_score_test = R2(z_test, z_hat_test)

    MSE_score_train_sk = MSE(z_train, z_hat_train_sk)
    R2_score_train_sk = R2(z_train, z_hat_train_sk)
    MSE_score_test_sk = MSE(z_test, z_hat_test_sk)
    R2_score_test_sk = R2(z_test, z_hat_test_sk)
    
    # Estimated standard error for the beta coefficients
    SE_betas = model.SE

    var_hat = (1/X_train.shape[0]) * np.sum((z_train - z_hat_train)**2) # Estimated variance


    # Calculating 95% confidence intervall OLS, SVD
    betas = model.get_all_betas
    CI_lower_all_betas = betas - (1.96 * SE_betas)
    CI_upper_all_betas = betas + (1.96 * SE_betas)
    CL = np.zeros((CI_upper_all_betas.shape[0],2))
    CL[:,0] = CI_lower_all_betas
    CL[:,1] = CI_upper_all_betas

    betas_svd = model.get_all_betas

    # Constructing dataframe for beta coefficients
    degs = np.zeros(betas.shape[0]); degs.fill(degree)
    df = pd.DataFrame.from_dict({"degree" :degs,
                                 "coeff_name": [f"b{i}" for i in range(1,betas.shape[0]+1)],
                                 "coeff value": np.round(betas, decimals=4),
                                 "std error": np.round(SE_betas, decimals=4),
                                 "CI_lower":np.round(CI_lower_all_betas, decimals=4), 
                                 "CI_upper":np.round(CI_upper_all_betas, decimals=4)},
                                 orient='index').T
    
    
    
    
    
    coeffs_df = pd.concat([coeffs_df,df], axis=0)


        
    # Filling up dataframes for train and test evaluation
    z_train_OLS[degree] = z_train.flatten() 
    z_hat_train_OLS[degree] = z_hat_train.flatten()
    z_test_OLS[degree] = z_test.flatten()
    z_hat_test_OLS[degree] = z_hat_test.flatten()

    z_hat_train_SK[degree] = z_hat_train_sk.flatten()
    z_hat_test_SK[degree] = z_hat_test_sk.flatten()


# MSE calculations for all lambda values
mse_scores_train = ((z_train_OLS - z_hat_train_OLS) ** 2).mean()
mse_scores_test = ((z_test_OLS - z_hat_test_OLS) ** 2).mean()

mse_scores_train_svd = ((z_train_OLS - z_hat_train_SVD) ** 2).mean()
mse_scores_test_svd = ((z_test_OLS - z_hat_test_SVD) ** 2).mean()

mse_scores_train_sk = ((z_train_OLS - z_hat_train_SK) ** 2).mean()
mse_scores_test_sk = ((z_test_OLS - z_hat_test_SK) ** 2).mean()

# R2 calculations for all lambda values
R2_scores_train = 1 - ((z_train_OLS - z_hat_train_OLS) ** 2).sum() / ((z_train_OLS - z_train_OLS.mean())**2).sum() 
R2_scores_test = 1 - ((z_test_OLS - z_hat_test_OLS) ** 2).sum() / ((z_test_OLS - z_test_OLS.mean())**2).sum()

In [None]:
# Plots
plt.figure(figsize=(12,8))
plt.plot(np.arange(1,degrees+1), mse_scores_train, c="c", label="Training data")
plt.plot(np.arange(1,degrees+1), mse_scores_test, c="m", label="Test data")
#plt.plot(np.arange(1,degrees+1), mse_scores_train_svd, "b--", label="Training data SVD")
#plt.plot(np.arange(1,degrees+1), mse_scores_test_svd, "g--", label="Test data SVD")
plt.plot(np.arange(1,degrees+1), mse_scores_train_sk, "r--", label="Training data sklearn")
plt.plot(np.arange(1,degrees+1), mse_scores_test_sk, "y--", label="Test data sklearn")
plt.xlabel("Model complexity / Polynomial Degree")
plt.ylabel("Prediction Error - MSE")
plt.title(f"Training evaluation on OLS regression fit\n(noise factor {noise})")
plt.legend()
plt.grid(True)
plt.tight_layout()
# plt.savefig(f"{REPORT_FIGURES}{EX1}franke_function_OLS_evaluate_fit_1.pdf")
plt.show()

## Looking at $\beta$ values

In [None]:
for i in range(1,degree+1):
    degree_coeffs = coeffs_df[coeffs_df['degree'] == i]
    display(degree_coeffs)
    fig = plot_beta_errors(degree_coeffs, i)
    plt.title(f"Beta error OLS - degree{degree}\n(noise factor {noise})")
    # fig.savefig(f"{REPORT_FIGURES}{EX1}OLS_beta_error_degree{i}.pdf")

Regarding the subpar Figure labels, we have not been able to determine when they were introduced. We refer to the report pdf for a earlier and correct rendition of the plot. 

Repeat of the previous exercise, this time with foure times the amount of added noise. (0.05 -> 0.2)

In [None]:
SEED_VALUE = np.random.seed(4155)
degrees = 5
n = 20 # The number of points in direction for the Franke Function
x = np.sort(np.random.uniform(0, 1, n))
y = np.sort(np.random.uniform(0, 1, n))
x, y = np.meshgrid(x,y)
noise = 0.2
z = FrankeFunction(x, y) + noise_factor(n,factor=noise)

In [None]:
# Setting up dataframes for the observed values
z_train_OLS = pd.DataFrame()
z_test_OLS = pd.DataFrame()

# Setting up the dataframes for our computed values
z_hat_train_OLS = pd.DataFrame()
z_hat_test_OLS = pd.DataFrame()

# Setting up dataframes for sklearn computed values
z_hat_train_SK = pd.DataFrame()
z_hat_test_SK = pd.DataFrame()

coeffs_df = pd.DataFrame()

for degree in range(1, degrees+1):
    X = create_X(x, y, degree) # Design Matrix
    
    # Scaling data and splitting it into training and test sets
    X_train, X_test, z_train, z_test = prepare_data(X, z.ravel(), SEED_VALUE, test_size=0.2, shuffle=True, scale_X=True, skip_intercept=True)
    
    # Model construction, fitting, and predictions using matrix inversion
    model = OLS() # The model
    z_hat_train = model.fit(X_train, z_train) # Fitting the model and predict on training data
    z_hat_test = model.predict(X_test) # predict on test data

    # Model construction, fitting and predictions using sklearn
    model_sk = lm.LinearRegression(fit_intercept=False)
    model_sk.fit(X_train, z_train)
    z_hat_train_sk = model_sk.predict(X_train)
    z_hat_test_sk = model_sk.predict(X_test)
    
    # Evaluatation metrics OLS, SVD, SK
    MSE_score_train = MSE(z_train, z_hat_train)
    R2_score_train = R2(z_train, z_hat_train)
    MSE_score_test = MSE(z_test, z_hat_test)
    R2_score_test = R2(z_test, z_hat_test)


    MSE_score_train_sk = MSE(z_train, z_hat_train_sk)
    R2_score_train_sk = R2(z_train, z_hat_train_sk)
    MSE_score_test_sk = MSE(z_test, z_hat_test_sk)
    R2_score_test_sk = R2(z_test, z_hat_test_sk)
    
    # Estimated standard error for the beta coefficients
    SE_betas = model.SE
    
    # Calculating 95% confidence intervall OLS, SVD
    
    betas = model.get_all_betas
    CI_lower_all_betas = betas - (1.96 * SE_betas)
    CI_upper_all_betas = betas + (1.96 * SE_betas)
    CL = np.zeros((CI_upper_all_betas.shape[0],2))
    CL[:,0] = CI_lower_all_betas
    CL[:,1] = CI_upper_all_betas

    betas_svd = model.get_all_betas

    
    # Constructing dataframe for beta coefficients
    degs = np.zeros(betas.shape[0]); degs.fill(degree)
    df = pd.DataFrame.from_dict({"degree" :degs,
                                 "coeff_name": [f"b{i}" for i in range(1,betas.shape[0]+1)],
                                 "coeff value": np.round(betas, decimals=4),
                                 "std error": np.round(SE_betas, decimals=4),
                                 "CI_lower":np.round(CI_lower_all_betas, decimals=4), 
                                 "CI_upper":np.round(CI_upper_all_betas, decimals=4)},
                                 orient='index').T
    coeffs_df = pd.concat([coeffs_df,df], axis=0)
    
        
    # Filling up dataframes for train and test evaluation
    z_train_OLS[degree] = z_train.flatten() 
    z_hat_train_OLS[degree] = z_hat_train.flatten()
    z_test_OLS[degree] = z_test.flatten()
    z_hat_test_OLS[degree] = z_hat_test.flatten()
    z_hat_train_SK[degree] = z_hat_train_sk.flatten()
    z_hat_test_SK[degree] = z_hat_test_sk.flatten()
# MSE calculations for all lambda values
mse_scores_train = ((z_train_OLS - z_hat_train_OLS) ** 2).mean()
mse_scores_test = ((z_test_OLS - z_hat_test_OLS) ** 2).mean()

mse_scores_train_sk = ((z_train_OLS - z_hat_train_SK) ** 2).mean()
mse_scores_test_sk = ((z_test_OLS - z_hat_test_SK) ** 2).mean()
# R2 calculations for all lambda values
R2_scores_train = 1 - ((z_train_OLS - z_hat_train_OLS) ** 2).sum() / ((z_train_OLS - z_train_OLS.mean())**2).sum() 
R2_scores_test = 1 - ((z_test_OLS - z_hat_test_OLS) ** 2).sum() / ((z_test_OLS - z_test_OLS.mean())**2).sum()

Morten_r2_train = R2(z_train_OLS, z_hat_train_OLS)
Morten_r2_test = R2(z_test_OLS, z_hat_test_OLS)

In [None]:
# Plots
plt.figure(figsize=(12,8))
plt.plot(np.arange(1,degrees+1), mse_scores_train, c="c", label="Training data")
plt.plot(np.arange(1,degrees+1), mse_scores_test, c="m", label="Test data")
plt.plot(np.arange(1,degrees+1), mse_scores_train_sk, "r--", label="Training data sklearn")
plt.plot(np.arange(1,degrees+1), mse_scores_test_sk, "y--", label="Test data sklearn")
plt.xlabel("Model complexity / Polynomial Degree")
plt.ylabel("Prediction Error - MSE")
plt.title(f"Training evaluation on OLS regression fit\n(noise factor {noise})")
plt.legend()
plt.grid(True)
plt.tight_layout()
# plt.savefig(f"{REPORT_FIGURES}{EX1}franke_function_OLS_evaluate_fit_dn.pdf")
plt.show()

In [None]:
plt.plot(np.arange(1,degrees+1), R2_scores_test, "r--", label="Test data")
plt.plot(np.arange(1,degrees+1), R2_scores_train, "y--", label="Training data")
plt.xlabel("Model complexity / Polynomial Degree")
plt.ylabel("R2 score")
plt.title(f"R2 evaluation on OLS regression fit\n(noise factor {noise})")
plt.legend()
plt.grid(True)
plt.tight_layout()
# plt.savefig(f"{REPORT_FIGURES}{EX1}franke_function_R2.pdf")
plt.show()

In [None]:
for i in range(1,degree+1):
    degree_coeffs = coeffs_df[coeffs_df['degree'] == i]
    display(degree_coeffs)
    fig = plot_beta_errors(degree_coeffs, i)
    plt.title(f"Beta error OLS - degree{degree}\n(noise factor {noise})")
    # fig.savefig(f"{REPORT_FIGURES}{EX1}OLS_beta_error_degree{i}_mn.pdf")