In [None]:
#3ai) SGD with exponential tuning for OLS

In [None]:
# Importing various packages
from random import random, seed
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import sys
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
import pandas as pd
import seaborn as sns
from math import exp, sqrt
from random import random, seed

#grid size
n = 200 

# Make data set.
x = np.linspace(0,(np.pi)/2,n).reshape(-1, 1)

#Northern H. parameters
s0 = 1
s2 = -0.473
a0 = 0.675
a2 = -0.192
i2 = -0.165

#flux function (eqn. (14) from Stone_1978)
y = 0.5*(s0*a2+s2*a0+(2/7)*s2*a2-i2)*((np.sin(x))**3-np.sin(x))

#noisy flux function
y_noisy = np.random.normal(y, abs(y*0.05)) 

def MSE(y_data,y_model):
    n = np.size(y_model)
    return np.sum((y_data-y_model)**2)/n

#polynomial fit
degree=6
poly = PolynomialFeatures(degree=degree)
X = poly.fit_transform(x)

# Hessian matrix
H = (2.0/n)* X.T @ X
invH = np.linalg.pinv(H)

# Get the eigenvalues
EigValues, EigVectors = np.linalg.eig(H)

n_epochs = 100000

for M in [1, 2, 4, 32]:
    
    m = int(n/M) #number of minibatches
    
    #exponential learning rate scheduler
    def learning_schedule(epoch, gamma_0):
        return gamma_0*np.exp(-epoch*gamma_0/100)

    beta_OLS = np.random.randn(degree+1,1)

    for epoch in range(n_epochs):
    
        for i in range(m):
            random_index = M*np.random.randint(m)
            xi = X[random_index:random_index+M]
            yi = y_noisy[random_index:random_index+M]

            gradient_OLS = (2.0/M)* xi.T @ ((xi @ beta_OLS)-yi)
            gamma_OLS = learning_schedule(epoch, 0.005) 

            beta_OLS = beta_OLS - gamma_OLS*gradient_OLS

    y_OLS_SGD = X @ beta_OLS

    MSE_OLS_SGD = MSE (y_OLS_SGD, y_noisy)
    print('method MSE=', MSE_OLS_SGD)  
    print('β=', beta_OLS)


    plt.plot(x*180/np.pi, y_noisy, 'ro', label='data')
    plt.plot(x*180/np.pi, y_OLS_SGD, label='SGD OLS')
    plt.xlabel('latitude [degrees]')
    plt.ylabel('f')
    plt.legend()
    plt.show()
    
#Calculating the time processing time
import time
start_time = time.time()
print("execution time=", (time.time() - start_time), 's')

In [None]:
#3aii) SGD with exponential tuning for Ridge

In [None]:
#Ridge parameter
nlambdas = 4 
lambdas = np.logspace(-4, -2, nlambdas) 

#learning schedule
def learning_schedule(epoch, gamma_0):
    return gamma_0*np.exp(-epoch*gamma_0/100)

#leaning rate constants
n_epochs = 10000
gamma_0 = 0.005

for M in [1, 2, 8, 32]: #we vary the the size of each minibatch

    m = int(n/M) #number of minibatches

    beta_Ridge = np.random.randn(degree+1,1)
    y_Ridge_SGD_array= np.zeros(nlambdas, dtype=object)
    MSE_Ridge_SGD = np.zeros(nlambdas)

    for l in range(nlambdas):
        lmbda = lambdas[l]
        beta_Ridge = np.random.randn(degree+1,1)    

        for epoch in range(n_epochs):

            for i in range(m):
                random_index = M*np.random.randint(m)
                xi = X[random_index:random_index+M]
                yi = y_noisy[random_index:random_index+M]

                gradient_Ridge = (2.0/M)*xi.T @ (xi @ (beta_Ridge)-yi)+2*lmbda*beta_Ridge

                gamma_Ridge = learning_schedule(epoch, gamma_0) 

            beta_Ridge = beta_Ridge - gamma_Ridge*gradient_Ridge

        y_Ridge_SGD = X @ beta_Ridge
        
        y_Ridge_SGD_array[l]=y_Ridge_SGD

        MSE_Ridge_SGD[l]=MSE(y_noisy, y_Ridge_SGD)    

    #finding the minimum value of the MSE
    MSE_Ridge_SGD_optimal = np.min(MSE_Ridge_SGD)
    conditon = (MSE_Ridge_SGD == MSE_Ridge_SGD_optimal)
    #the l, g for which we have the minimum MSE
    result = np.where(conditon)
    print(result)

    #saving the y that gives the optimal MSE
    y_Ridge_SGD_optimal= y_Ridge_SGD_array[result]
    y_Ridge_SGD_optimal=(y_Ridge_SGD_optimal[0])

    print('method MSE=', MSE_Ridge_SGD_optimal)

    plt.plot(x*180/np.pi, y_noisy, 'ro')
    plt.plot(x*180/np.pi, y_Ridge_SGD_optimal, label = 'optimal Ridge SGD')
    plt.xlabel('latitude [degrees]')
    plt.ylabel('f')
    plt.legend()
    plt.show()    

    plt.plot(np.log10(lambdas), MSE_Ridge_SGD, label ='MSE Ridge SGD')
    plt.xlabel('λ')
    plt.ylabel('MSE')
    plt.legend()
    plt.show()
    
#Calculating the time processing time
import time
start_time = time.time()
print("execution time=", (time.time() - start_time), 's')