# Exercise 6: Analysis of real data  (score 30 points)


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from imageio import imread
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import seaborn as sns
import os
from common import *
import cv2
#from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.preprocessing import MinMaxScaler


print(f"Root directory: {os.getcwd()}")

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
    "font.size": 10,
})

#%matplotlib inline 

# Global variables

In [None]:
np.random.seed(SEED_VALUE) # Random seed to guarantee reproducibility
# Paths
#INPUT_DATA = "data/input_data/"  # Path for input data
#REPORT_DATA = "data/report_data/" # Path for data ment for the report
#REPORT_FIGURES = "figures/" # Path for figures ment for the report
# Setting for range of degrees
#from_degree = 0
#to_degree = 14
#degrees = np.arange(from_degree,to_degree)
#degree = 5
# Setting for logspace range of lambdas 
#from_lambda = -7 #
#to_lambda = 3 #
#nLambdas = 10
#lambdas = np.logspace(from_lambda, to_lambda, nLambdas)
# Rescale settings
rescale_factor = 0.2

# 0. Reading and plotting terrain data

In [None]:
# Load the terrain
terrain1_file = "SRTM_data_Norway_1.tif"
terrain2_file = "SRTM_data_Norway_2.tif"
terrain1 = imread(f'{INPUT_DATA}{terrain1_file}')
terrain2 = imread(f'{INPUT_DATA}{terrain2_file}')

# Plotting terrain
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text("Terrain over Norway 1")
ax1.set_xlabel("X"); ax1.set_ylabel("Y")
surf1 = ax1.imshow(terrain1, cmap='gray')
ax2.title.set_text("Terrain over Norway 2")
ax2.set_xlabel("X"); ax2.set_ylabel("Y")
surf2 = ax2.imshow(terrain2, cmap='gray')
plt.savefig(f"{REPORT_FIGURES}terrain_data.pdf")
plt.show()
print(terrain1[0,0])

## Thoughts on the topographical data
Before we proceed with this exercise, we want to briefly discuss the nature of this terrain data compared to the generated frank function. We regard the franke function as generic because it is known and its behavior or shape can be generated for other values of x and y. Thus, creating a model that can generalize the function even with added noise for other unknown data points for x and y makes sense. However, we are uncertain if this idea of generalization is transferable to topographical terrain data in the same sense. The terrain data given is unique, and its shape cannot be generalized for unknown data points. If the purpose is to create the absolute best fit for the specific and unique terrain data, one could simply overfit to the terrain data by having an extremely high degree when fitting a model, and that is not really ML. One could probably argue that a model should tackle added noise on the terrain data and still be able to represent the shapes and contours incorporated in the image. However, images are what they are, and in this case, the topographical terrain "is what it is."  With that being said, we proceed with this exercise in the same spirit as done for exercises 1-5 even though we question this use case when working on this kind of real data.

# 3D plot of the whole Terrain image

In [None]:
# Make data for terrain1
z1 = np.array(terrain1)
scaler = MinMaxScaler()
scaler.fit(z1)
z1_scaled = scaler.transform(z1)
#y1 = np.linspace(0,1,z1.shape[0])
#x1 = np.linspace(0,1,z1.shape[1])
y1 = np.arange(0, z1.shape[0])
x1 = np.arange(0, z1.shape[1])
x1_m, y1_m = np.meshgrid(x1,y1)

# Make data for terrain2
z2 = np.array(terrain2)
scaler = MinMaxScaler()
scaler.fit(z2)
z2_scaled = scaler.transform(z2)
#y2 = np.linspace(0,1,z2.shape[0])
#x2 = np.linspace(0,1,z2.shape[1])
y2 = np.arange(0, z2.shape[0])
x2 = np.arange(0, z2.shape[1])
x2_m, y2_m = np.meshgrid(x2,y2)

#%matplotlib
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1, projection='3d')
ax1.title.set_text("Terrain1 plot")
ax1.set_xlabel("x"); ax1.set_ylabel("y"); ax1.set_zlabel("z")
ax1.plot_surface(x1_m, y1_m, z1, cmap=cm.coolwarm, linewidth = 0, antialiased=False)

ax2 = fig.add_subplot(1,2,2, projection='3d')
ax2.title.set_text("Terrain2 plot")
ax2.set_xlabel("x"); ax2.set_ylabel("y"); ax2.set_zlabel("z")
ax2.plot_surface(x2_m, y2_m, z2, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
plt.show()

# 1. Preprocessing and transformation of terrain data
Least Square regression is not designed to tackle images directly. Thus, we must first transform the terrain data by slicing it into several bits and pieces. 

# Resizeing the terrain image
For computational purpose, we resize the terrain image to have a resonable amount of datapoints for our least sqaure models.

In [None]:
ySize = int(terrain1.shape[0] * rescale_factor); print(ySize)
xSize = int(terrain1.shape[1] * rescale_factor); print(xSize)
terrain1Resized = cv2.resize(terrain1, (xSize, ySize))
terrain2Resized = cv2.resize(terrain2, (xSize, ySize))

# Plotting terrain
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text("Terrain over Norway 1 (Resized)")
ax1.set_xlabel("X"); ax1.set_ylabel("Y")
surf1 = ax1.imshow(terrain1Resized, cmap='gray')
ax2.title.set_text("Terrain over Norway 2 (Resized)")
ax2.set_xlabel("X"); ax2.set_ylabel("Y")
surf2 = ax2.imshow(terrain2Resized, cmap='gray')
plt.savefig(f"{REPORT_FIGURES}terrain_data.pdf")
plt.show()
print(terrain1[0,0])

# Creating image patches and Terrain data selection

## Methods

In [None]:
def create_img_patches(img, ySteps, xSteps):
    patches = []
    for y in range(0,img.shape[0], ySteps):
        for x in range(0,img.shape[1], xSteps):
            y_from = y; 
            y_to = y+ySteps; 
            x_from = x; 
            x_to = x+xSteps; 
            img_patch = img[y_from:y_to, x_from:x_to]        
            patches.append(img_patch)

    return patches

def patches_to_img(patches, ySteps, xSteps, nYpatches, nXpatches, plotImage=False):
    img = np.zeros((ySteps*nYpatches, xSteps*nXpatches))
    i = 0
    for y in range(0,img.shape[0], ySteps):
        for x in range(0,img.shape[1], xSteps):
            y_from = y; 
            y_to = y+ySteps; 
            x_from = x; 
            x_to = x+xSteps; 
            img[y_from:y_to, x_from:x_to] = patches[i]         
            i += 1
    
    if plotImage:
        plt.imshow(img, cmap='gray')
        plt.title("Reconstructed img")
        plt.show()
    return img

def plotTerrainPatches(patches, nYpatches, nXpatches, plotTitle="Terrain patches"):
    # Plotting terrain patches
    fig, ax = plt.subplots(nYpatches, nXpatches,figsize=(4,8))
    i=0
    for y in range(nYpatches):
        for x in range(nXpatches):
            ax[y,x].title.set_text(f"Patch{i}")
            ax[y,x].set_xlabel("X"); ax1.set_ylabel("Y")
            ax[y,x].imshow(patches[i], cmap='gray')
            i+=1
    
    fig.suptitle(f"{plotTitle}") # or plt.suptitle('Main title')
    plt.tight_layout()
    plt.show()

def createTerrainData(terrain, includeMeshgrid=True):
    z = np.array(terrain) 
    x = np.arange(0, z.shape[1])
    y = np.arange(0, z.shape[0])
    if includeMeshgrid:
        x, y = np.meshgrid(x,y)
    return x,y,z


In [None]:
nXpatches = 2; nYpatches=4
ySteps = int(terrain2Resized.shape[0] / nYpatches)
xSteps = int(terrain2Resized.shape[1] / nXpatches)

patches_1 = create_img_patches(terrain1Resized, ySteps, xSteps)
plotTerrainPatches(patches_1, nYpatches, nXpatches, plotTitle="Terrain1 patches")

patches_2 = create_img_patches(terrain2Resized, ySteps, xSteps)
plotTerrainPatches(patches_2, nYpatches, nXpatches, plotTitle="Terrain2 patches")

# test
#img_reconstructed = patches_to_img(patches, ySteps, xSteps, nYpatches, nXpatches, plotImage=True)


# Choosing of terrain patch and data creation
We look at the terrain data patches and choose which to create a fit for

In [None]:
img1 = patches_1[1]
img2 = patches_2[3]
x1, y1, z1 = createTerrainData(img1)
x2, y2, z2 = createTerrainData(img2)

In [None]:
# 2D plot of the terrain patches
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text(f"Terrain patch from terrain1\nMean:\
{np.round(np.mean(img1),decimals=1)}\nVariance: {np.round(np.var(img1),decimals=1)}")
ax1.set_xlabel("X"); ax1.set_ylabel("Y")
surf1 = ax1.imshow(img1, cmap='gray')

ax2.title.set_text(F"Terrain patch from terrain2\nMean:\
{np.round(np.mean(img2),decimals=1)}\nVariance: {np.round(np.var(img2),decimals=1)}")
ax2.set_xlabel("X"); ax2.set_ylabel("Y")
surf2 = ax2.imshow(img2, cmap='gray')
plt.show()

# 3D plot of the terrain patches
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1, projection='3d')
ax1.title.set_text(f"3D plot of terrain1 patch")
ax1.set_xlabel("x"); ax1.set_ylabel("y"); ax1.set_zlabel("z")
#ax1.view_init(elev=60., azim=-120.0-70)
#ax1.view_init(elev=-60., azim=-120.0+30)
ax1.view_init(elev=-75., azim=-91)

ax1.plot_surface(x1, y1, z1, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
ax2 = fig.add_subplot(1,2,2, projection='3d')
ax2.title.set_text("3D plot of terrain2 patch")
ax2.set_xlabel("x"); ax2.set_ylabel("y"); ax2.set_zlabel("z")
#ax2.view_init(elev=60., azim=-120.0)
ax2.view_init(elev=-45., azim=-85.0)
ax2.plot_surface(x2, y2, z2, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
plt.show()#-91 -75


# 2. Base input data for least square regression
We construct the data for least square regression based on preprocessed data. We also set up variables that will be used throughout the exercise.<br>
Terrain patch from terrain 1 is used as input for our models and our tests 

In [None]:
x, y, z = x1, y1, z1

# 3. OLS on data (Exercise1)

## Running OLS fit on the data as done in EX1 
Note that we exlude the calculation of CL for betas, since it is emedded within the model itself. See common.py for that code

In [None]:
degrees = 8
z_flat = z.ravel(); z_flat = z_flat.reshape(-1,1)
z_train_OLS = pd.DataFrame()
z_hat_train_OLS = pd.DataFrame()
z_test_OLS = pd.DataFrame()
z_hat_test_OLS = pd.DataFrame()

OLSrun = []
for degree in range(1, degrees+1):
    print(f"Running OLS fitting on degree{degree}")
    X = create_X(x, y, degree) # Design Matrix
    
    # Scaling data and splitting it into training and test sets
    #X_train, X_test, z_train, z_test = prepare_data(X, z, test_size=0.2, shuffle=True, scale_X=False, scale_t=False)
    X_train, X_test, z_train, z_test = prepare_data(X, z_flat, test_size=0.2, shuffle=True, scale_X=True, scale_t=True, random_state=4155)
    
    # Model construction, fitting, and predictions
    model = OLS(degree=degree) # The model
    z_hat_train = model.fit(X_train, z_train, SVDfit=False) # Fitting the model and predict on training data
    z_hat_test = model.predict(X_test) # predict on test data
    
    # Evaluatation metrics
    MSE_score_train = MSE(z_train, z_hat_train)
    R2_score_train = R2(z_train, z_hat_train)
    MSE_score_test = MSE(z_test, z_hat_test)
    R2_score_test = R2(z_test, z_hat_test)
            
    # Filling up dataframes for train and test evaluation
    z_train_OLS[degree] = z_train.flatten() 
    z_hat_train_OLS[degree] = z_hat_train.flatten()
    z_test_OLS[degree] = z_test.flatten()
    z_hat_test_OLS[degree] = z_hat_test.flatten()

    # Storing data for all degrees
    results = {"X_train":X_train, "X_test":X_test,"z_train":z_train, "z_test":z_test,
               "z_hat_train":z_hat_train, "z_hat_test":z_hat_test, "model":model, "summary":model.summary()}
    OLSrun.append(results)


# MSE calculations for all degrees
mse_scores_train = ((z_train_OLS - z_hat_train_OLS) ** 2).mean()
mse_scores_test = ((z_test_OLS - z_hat_test_OLS) ** 2).mean()
# R2 calculations for all degrees
R2_scores_train = 1 - ((z_train_OLS - z_hat_train_OLS) ** 2).sum() / ((z_train_OLS - z_train_OLS.mean())**2).sum() 
R2_scores_test = 1 - ((z_test_OLS - z_hat_test_OLS) ** 2).sum() / ((z_test_OLS - z_test_OLS.mean())**2).sum()

## Plotting performance of OLS for different degrees

In [None]:
plt.plot(np.arange(1,degrees+1), mse_scores_test,"m", label='MSE on test')
plt.plot(np.arange(1,degrees+1), mse_scores_train,"c", label='MSE on train')
#plt.plot(np.arange(1,degrees+1), R2_scores_test, label='R2 on test')
#plt.plot(np.arange(1,degrees+1), R2_scores_train, label='R2 on train')
plt.xlabel("Model complexity / Polynomial Degree")
plt.ylabel("Prediction Error - MSE")

plt.grid(True)
plt.legend()
#plt.savefig(f"{REPORT_FIGURES}franke_function_OLS_evaluate_fit.pdf")
plt.show()

## Looking at $\beta$ values from degree 4

In [None]:
optimal_degree = 4
OLSrunOptimal = OLSrun[optimal_degree-1]
OLS_summary = OLSrunOptimal["summary"]
display(OLS_summary)

## Plotting the fitted terrain image using the most optimal degree

In [None]:
X = create_X(x1, y1, 50) # Design Matrix
X_scaled = standard_scaling(X)
z1_scaled, scaler = standard_scaling(z1.ravel().reshape(-1,1))

# Model construction, fitting, and predictions
model = OLS(degree=optimal_degree) # The model
z_hat_train = model.fit(X_scaled, z1_scaled, SVDfit=False, keep_intercept=True) # Fitting the model and predict on training data

z_hat = scaler.inverse_transform(z_hat_train)
z_hat = z_hat.reshape((ySteps,xSteps))

# 2D plot of the terrain patches
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text(F"Terrain to predict\nMean:\
{np.round(np.mean(img1),decimals=1)}\nVariance: {np.round(np.var(img1),decimals=1)}")
ax1.set_xlabel("X"); ax1.set_ylabel("Y")
surf1 = ax1.imshow(img1, cmap='gray')
ax2.title.set_text(F"Predicted terrain using OLS\nMean:\
{np.round(np.mean(z_hat),decimals=1)}\nVariance: {np.round(np.var(z_hat),decimals=1)}")
ax2.set_xlabel("X"); ax2.set_ylabel("Y")
surf2 = ax2.imshow(z_hat, cmap='gray')
plt.show()

# 3D plot of predicted terrain patches
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1, projection='3d')
ax1.title.set_text("3D plot of Terrain to predict")
ax1.set_xlabel("x"); ax1.set_ylabel("y"); ax1.set_zlabel("z")
ax1.view_init(elev=60., azim=-120.0-70)
ax1.plot_surface(x1, y1, z1, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
ax2 = fig.add_subplot(1,2,2, projection='3d')
ax2.title.set_text("3D plot of predicted terrain using OLS")
ax2.set_xlabel("x"); ax2.set_ylabel("y"); ax2.set_zlabel("z")
ax2.view_init(elev=60., azim=-120.0-70)
ax2.plot_surface(x1, y1, z_hat, cmap=cm.coolwarm, linewidth = 0, antialiased=False)
plt.show()

### Comments on the OLS fit to terrain data:
We scale the data since x, y, z is they are not between 0 to 1. Degree of 4 seems to yeild the best performance when fitting to the choosen terrain data.

## Trying to predict all patches using degree 4
Using patch 1 as a reference, we try to predict all other patches using the same degree. 

In [None]:
testDegree = 150
testDegree = 4
patches_1_preds = []
X = create_X(x1, y1, testDegree) # Design Matrix
X_scaled = standard_scaling(X)

for patch in tqdm(patches_1):
    z_scaled = standard_scaling(patch.ravel().reshape(-1,1))
    model = OLS(degree=testDegree) # The model
    z_hat_train = model.fit(X_scaled, z_scaled, SVDfit=False, keep_intercept=True) # Fitting the model and predict on training data
    z_hat = z_hat_train.reshape((ySteps,xSteps))
    patches_1_preds.append(z_hat)
    
terrain1_predicted = patches_to_img(patches_1_preds, ySteps, xSteps, nYpatches, nXpatches, plotImage=False)

# Plotting predicted patches
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text("Terrain1 (resized)")
ax1.set_xlabel("X"); ax1.set_ylabel("Y")
surf1 = ax1.imshow(terrain1Resized, cmap='gray')
ax2.title.set_text(f"Terrain1 - OLS predicted\nDegrees used:{testDegree}")
ax2.set_xlabel("X"); ax2.set_ylabel("Y")
surf2 = ax2.imshow(terrain1_predicted, cmap='gray')
plt.savefig(f"{REPORT_FIGURES}terrain_OLSpredicted_degree{testDegree}.pdf")
plt.show()

A degree of 4 or 5 seems to give a smooth surface for all the predicted patches. We find that the distortion and noise increase in the predicted image when the degree increases above 4-5 considering all patches. At higher degrees, some artifacts within the predicted patches also appear. In the predicted image with all patches, one can see some of the contours of the topographic structures in the image we try to approximate. However, the predicted image that is reconstructed from all the predicted patches is not very accurate. The task of this kind of problem is too complex for an OLS to manage. It may be that having smaller patches would increase the accuracy in reproducing the details incorporated in the input image. 

# 4. Bias-variance trade-off and resampling techniques on terrain data (Exercise2)

## 4.1 Setting up variables and data

In [None]:
np.random.seed(4155)
maxdegree = 10
n_bootstraps = 10
MSE_test = np.zeros(maxdegree)
MSE_train = np.zeros(maxdegree)
polydegree = np.zeros(maxdegree)
bias = np.zeros(maxdegree)
variance = np.zeros(maxdegree)

## 4.2 Testing out different degrees

In [None]:
for degree in tqdm(range(maxdegree), desc = f"Looping through polynomials up to {n} degrees with {n_bootstraps} bootstraps: "):
    #model = LinearRegression()
    #model= make_pipeline(PolynomialFeatures(degree=degree), LinearRegression(fit_intercept=False)) 
    X = create_X(x, y, n=degree)
    #print(f"X.shape:{X.shape}")    
    X_train, X_test, y_train, y_test = train_test_split(X, z.reshape(-1,1), test_size=0.2)

    #print(z.shape)
    #reshape for broadcasting in MSE_test and MSE_val  
    y_test_ = np.reshape(y_test, newshape=(y_test.shape[0],1))
    #reshape for broadcasting in MSE_test and MSE_val  
    y_train_ = np.reshape(y_train, newshape=(y_train.shape[0],1))

    #y_train = np.reshape(y_train, newshape=(y_train.shape[0],1))
    #TODO: why scale?!?

    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    y_pred = np.empty((y_test.shape[0], n_bootstraps))
    y_fit = np.empty((y_train.shape[0], n_bootstraps))
    
    for i in range(n_bootstraps):
        #bootstrap:
        x_, y_ = resample(X_train_scaled, y_train)
        #fit model to x_,y_ sample:
        clf =  LinearRegression().fit(x_, y_)
        #fit model and predict on test data:
        y_pred[:, i] = clf.predict(X_test_scaled).ravel()
        #predict on train data:
        y_fit[:,i] = clf.predict(x_).ravel()
        

    polydegree[degree] = degree
    #print(f"y_test.shape:{y_test.shape}, y_pred.shape{y_pred.shape}")
    MSE_test[degree] = np.mean( np.mean((y_test - y_pred)**2, axis=1, keepdims=True ))
    MSE_train[degree] = np.mean( np.mean((y_train - y_fit)**2, axis=1, keepdims=True ))
    bias[degree] = np.mean( (y_test - np.mean(y_pred, axis=1, keepdims=True))**2 )
    variance[degree] = np.mean( np.var(y_pred, axis=1, keepdims=True))
    
    
plt.plot(polydegree, MSE_test,"m", label='MSE_test')
plt.plot(polydegree, MSE_train,"c", label='MSE_train')

plt.plot(polydegree, bias,"b--", label='bias')
plt.plot(polydegree, variance,"r--", label='Variance')
#plt.plot(polydegree, bias+variance,"g--", label='bias+variance')

plt.xlabel("Model complexity / Polynomial Degree")
plt.ylabel("Prediction Error")

plt.grid(True)
plt.legend()
plt.show()

## 4.3 Studying bias-variance tradeoff as dependance on the number of datpoints

In [None]:
n_list = [10, 20, 30, 40, 50]
n_bootstraps = 20
maxdegree = 12
polydegree = np.arange(maxdegree)

for n in n_list:
    x = np.sort(np.random.uniform(0,1,n))
    y = np.sort(np.random.uniform(0,1,n))
    x,y = np.meshgrid(x,y)
    z = common.FrankeFunction(x, y) + 0.2*np.random.normal(0, size = n)

    MSE_test_n = np.zeros(maxdegree)
    MSE_train_n = np.zeros(maxdegree)
    variance_n = np.zeros(maxdegree)
    bias_n = np.zeros(maxdegree)

    for degree in tqdm(range(maxdegree), desc = f"Looping through polynomials up to {maxdegree} degrees with {n_bootstraps} bootstraps: "):
        model = common.LinearRegression()
        #model= make_pipeline(PolynomialFeatures(degree=degree), LinearRegression(fit_intercept=False)) 
        X = common.create_X(x, y, n=degree)    
        X_train, X_test, y_train, y_test = train_test_split(X, z.reshape(-1,1), test_size=0.2)

        #reshape for broadcasting in MSE_test and MSE_val  
        y_test_ = np.reshape(y_test, newshape=(y_test.shape[0],1))
        #reshape for broadcasting in MSE_test and MSE_val  
        y_train_ = np.reshape(y_train, newshape=(y_train.shape[0],1))

        #y_train = np.reshape(y_train, newshape=(y_train.shape[0],1))
        #TODO: why scale?!?

        scaler = StandardScaler()
        scaler.fit(X_train)
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        y_pred = np.empty((y_test.shape[0], n_bootstraps))
        y_fit = np.empty((y_train.shape[0], n_bootstraps))
    
        for i in range(n_bootstraps):
            #bootstrap:
            x_, y_ = resample(X_train_scaled, y_train)
            #fit model to x_,y_ sample:
            #print(f"y_.shape : {y_.shape}")
            model.fit(x_, y_, SVDfit=False)
            #fit model and predict on test data:
            y_pred[:, i] = model.predict(X_test_scaled).ravel()
            #predict on train data:
            y_fit[:,i] = model.predict(X_train_scaled).ravel()
        

        #polydegree[degree] = degree
   
        MSE_test_n[degree] = np.mean( np.mean((y_test_ - y_pred)**2, axis=1, keepdims=True) )
        MSE_train_n[degree] = np.mean( np.mean((y_train_ - y_fit)**2, axis=1, keepdims=True) )
        bias_n[degree] = np.mean( (y_test - np.mean(y_pred, axis=1, keepdims=True))**2 )
        variance_n[degree] = np.mean( np.var(y_pred, axis=1, keepdims=True))
    
    plt.plot(polydegree, MSE_test_n,"m", label='MSE_test')
    plt.plot(polydegree, MSE_train_n,"c", label='MSE_train')

    plt.plot(polydegree, bias_n,"g--", label='bias')
    plt.plot(polydegree, variance_n,"r--", label='Variance')

    plt.title(f"Bias-Variance tradeoff for n={n} datapoints")
    plt.xlabel("Model complexity / Polynomial Degree")
    plt.ylabel("Prediction Error")

    plt.grid(True)
    plt.legend()
    plt.show()

# Tests of dimensions image reduction and patches

In [None]:
i = 1
value = 720
while True:
    i += 1
    if((value % i)==0):
        print(f"value:{value / i} at i:{i}")
    if(i>=value):
        break

In [None]:
i = 1
value = 360
while True:
    i += 1
    if((value % i)==0):
        print(f"value:{value / i} at i:{i}")
    if(i>=value):
        break