In [1]:
#2i) Momentum GD with a fixed learning rate (simple tuning) for OLS

In [None]:
# Importing various packages
import sys
import numpy as np
from matplotlib import cm
from random import random, seed
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from matplotlib.ticker import LinearLocator, FormatStrFormatter

def MSE(y_data,y_model):
    n = np.size(y_model)
    return np.sum((y_data-y_model)**2)/n

#grid size
n = 200 

# Make data set.
x = np.linspace(0,(np.pi)/2,n).reshape(-1, 1)

#Northern H. parameters
s0 = 1
s2 = -0.473
a0 = 0.675
a2 = -0.192
i2 = -0.165

#flux function (eqn. (14) from Stone_1978)
y = 0.5*(s0*a2+s2*a0+(2/7)*s2*a2-i2)*((np.sin(x))**3-np.sin(x))

#noisy flux function
y_noisy = np.random.normal(y, abs(y*0.05)) 

#polynomial fit
degree=6
poly = PolynomialFeatures(degree=degree)
X = poly.fit_transform(x)

# Hessian matrix
H = (2.0/n)* X.T @ X

# Get the eigenvalues
EigValues, EigVectors = np.linalg.eig(H)
#print(EigValues)

beta_OLS_m = np.random.randn(degree+1,1)

Niterations = 1000000 

ngammas = 10 

gammas = np.linspace(1.0/np.max(EigValues), 0.03, ngammas)

MSE_OLS_GD_m = np.zeros(ngammas)   

for g in range(ngammas):
    gamma = gammas[g]
    
# Now improve with momentum gradient descent
    change_OLS = 0.0
    delta_momentum = 0.3

    for iter in range(Niterations):

        # calculate gradient
        gradient_OLS_m = (2.0/n)*X.T @ (X @ beta_OLS_m-y_noisy)

        # calculate update
        new_change_OLS = gamma*gradient_OLS_m+delta_momentum*change_OLS

        # take a step
        beta_OLS_m -= new_change_OLS

        # save the change
        change_OLS = new_change_OLS

    y_OLS_GD_m =X@beta_OLS_m

    MSE_OLS_GD_m[g]= MSE(y_noisy, y_OLS_GD_m)


print('method MSE=', MSE_OLS_GD_m)  
print('β=', beta_OLS_m)

plt.figure()
plt.plot(gammas, MSE_OLS_GD_m, label='MSE OLS GD')
plt.xlabel('γ')
plt.ylabel('MSE')
plt.legend()
plt.show()

#for the last value of gamma
ypredict_OLS_GD_m=X @ beta_OLS_m

plt.figure()
plt.plot(x*180/np.pi, y_noisy, 'ro', label='data')
plt.plot(x*180/np.pi, ypredict_OLS_GD_m, label='GD-m OLS, γ=0.03')
plt.xlabel('latitude [degrees]')
plt.ylabel('f')
plt.legend()
plt.show()

#Calculating the time processing time
import time
start_time = time.time()
print("execution time=", (time.time() - start_time), 's')

In [None]:
#2ii) Momentum GD with a fixed learning rate (simple tuning) for Ridge

In [2]:
#Ridge parameter
nlambdas = 12 
lambdas = np.logspace(-5, 0, nlambdas) 

#leaning rate constants
Niterations = 100000
ngammas = 10 
gammas = np.linspace(1.0/np.max(EigValues), 0.03, ngammas) 

y_Ridge_GD_m_array= np.zeros((nlambdas, ngammas), dtype=object)
MSE_Ridge_GD_m = np.zeros((nlambdas, ngammas))

for l in range(nlambdas):
    lmbda = lambdas[l]
    beta_Ridge_m = np.random.randn(degree+1,1)    

    for g in range(ngammas):
        gamma = gammas[g]

# Now improve with momentum gradient descent    
        change_Ridge = 0.0
        delta_momentum = 0.3

        for iter in range(Niterations):
            # calculate gradient       
            gradient_Ridge_m = (2.0/n)*X.T @ (X @ (beta_Ridge_m)-y_noisy)+2*lmbda*beta_Ridge_m

            # calculate update       
            new_change_Ridge = gamma*gradient_Ridge_m+delta_momentum*change_Ridge

            # take a step        
            beta_Ridge_m -= new_change_Ridge

            # save the change        
            change_Ridge = new_change_Ridge

        y_Ridge_GD_m =X@beta_Ridge_m
        
        y_Ridge_GD_m_array[l, g]=y_Ridge_GD_m
        
        MSE_Ridge_GD_m[l, g]= MSE(y_noisy, y_Ridge_GD_m)
        
#finding the minimum value of the MSE
MSE_Ridge_GD_m_optimal = np.min(MSE_Ridge_GD_m)
conditon = (MSE_Ridge_GD_m == MSE_Ridge_GD_m_optimal)
#the l, g for which we have the minimum MSE
result = np.where(conditon)
print(result)

#saving the y that gives the optimal MSE
y_Ridge_GD_m_optimal= y_Ridge_GD_m_array[result]
y_Ridge_GD_m_optimal=(y_Ridge_GD_m_optimal[0])
#print(y_Ridge_GD_optimal)

print('method MSE=', MSE_Ridge_GD_m_optimal)
 
import pandas as pd
import seaborn as sns

pd.DataFrame(MSE_Ridge_GD_m)
mse_data_ridge = pd.DataFrame(MSE_Ridge_GD_m)

plt.figure(figsize=(15,10))
sns.heatmap(data=mse_data_ridge, annot=True,  fmt=".1e", cmap="crest")
plt.xlabel("γ")
plt.ylabel("λ")
plt.show()

plt.figure()
plt.plot(x*180/np.pi, y_noisy, 'o', label='data')
plt.plot(x*180/np.pi, y_Ridge_GD_m_optimal, label='GD-m Ridge optimal')
plt.plot(x*180/np.pi, ypredict_OLS_GD_m, label='GD-m OLS, γ=0.03')
plt.xlabel('latitude [degrees]')
plt.ylabel('f')
plt.legend()
plt.show()

#Calculating the time processing time
import time
start_time = time.time()
print("execution time=", (time.time() - start_time), 's')

NameError: name 'np' is not defined