# Part a) SGD


In [None]:
import numpy as np
import pandas as pd
from imageio import imread
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import os
from common import *
from models import own_LinRegGD
import cv2

print(f"Root directory: {os.getcwd()}")
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
    "font.size": 10,
})

#%matplotlib

# Global variables

In [None]:
SEED_VALUE = 70707070
np.random.seed(SEED_VALUE)
SAVE_FIGURES = False

# Reading data and resizing

In [None]:
# Load the terrain
terrain1_file = "SRTM_data_Norway_1.tif"
terrain2_file = "SRTM_data_Norway_2.tif"
terrain1 =  imread(f'{INPUT_DATA}{terrain1_file}')
terrain2 = imread(f'{INPUT_DATA}{terrain2_file}')

# Resizing the image
rescale_factor = 0.1
y_size = int(terrain1.shape[0] * rescale_factor)
x_size = int(terrain1.shape[1] * rescale_factor)
terrain1Resized = cv2.resize(terrain1, (x_size, y_size))
terrain2Resized = cv2.resize(terrain2, (x_size, y_size))

# Plotting terrain
fig, (ax1, ax2) = plt.subplots(1, 2)
ax1.title.set_text("Terrain over Norway 1 (Resized)")
ax1.set_xlabel("X"); ax1.set_ylabel("Y")
surf1 = ax1.imshow(terrain1Resized, cmap='gray')
ax2.title.set_text("Terrain over Norway 2 (Resized)")
ax2.set_xlabel("X"); ax2.set_ylabel("Y")
surf2 = ax2.imshow(terrain2Resized, cmap='gray')

if SAVE_FIGURES:
    plt.savefig(f"{REPORT_FIGURES}{EX_A}terrain_data_resized.pdf")
plt.show()

# Creating image patches and Terrain data selection

In [None]:
nXpatches = 3; nYpatches=6
y_steps = int(terrain2Resized.shape[0] / nYpatches); print(y_steps)
x_steps = int(terrain2Resized.shape[1] / nXpatches); print(x_steps)

patches_1 = create_img_patches(terrain1Resized, y_steps, x_steps)
if SAVE_FIGURES:
    fig1 = plotTerrainPatches(patches_1, nYpatches, nXpatches, plotTitle="Terrain1 patches")
    plt.savefig(f"{REPORT_FIGURES}{EX_A}Terrain1_patches.pdf")
    plt.show()

patches_2 = create_img_patches(terrain2Resized, y_steps, x_steps)
if SAVE_FIGURES:
    fig2 = plotTerrainPatches(patches_2, nYpatches, nXpatches, plotTitle="Terrain2 patches")
    plt.savefig(f"{REPORT_FIGURES}{EX_A}Terrain2_patches.pdf")
    plt.show()

# Choosing two interesting terrain patches
img1 = patches_1[2]
img2 = patches_2[5]
x1, y1, z1 = createTerrainData(img1)
x2, y2, z2 = createTerrainData(img2)

# Constructing the terrain data
terrain_data = 1
if terrain_data == 1: # Choosing terrain1*
    x, y, z = x1, y1, z1.copy() 
    #z_min = np.min(z)
    z_max = np.max(z)
    z = z1

elif terrain_data == 2: # Choosing terrain2
    x, y, z = x2, y2, z2.copy() 
    #z_min = np.min(z)
    z_max = np.max(z)
    z = z2
    
z_flat = z.ravel(); z_flat = z_flat.reshape(-1,1)

# SGD

In [None]:
def step_length(t, t0, t1):
    return t0/(t+t1)

def new_sgd(X_train, t_train, theta, n_epoch, batch_size, eta, lr_scheduler=False, ridge=False, lmb=0):
    n_batches = int(X_train.shape[0] // batch_size)
    Xt = np.concatenate((X_train, t_train), axis=1)
    print(f"Number of minibatches: {n_batches}")
    
    if lr_scheduler:
        t0 = 1.0; t1 = 100
        eta = t0/t1
        print(f"Using learning rate scheduler with initial learning rate: {eta}")

    
    for epoch in tqdm(range(n_epoch), f"Training {n_epoch} epochs"):      
        batches = np.take(Xt, np.random.permutation(Xt.shape[0]), axis=0)
        batches = np.array_split(batches, n_batches, axis=0)
        
        for batch in batches:
            xi = batch[:, :-1]
            yi = batch[:, -1].reshape(-1,1)
            
            gradients = 2.0* xi.T @ ((xi @ theta)-yi)
            if ridge:
                # TODO: the coff regularization is not implemented correct. 
                #gradients +=  lmb*np.eye(theta.shape[0])
                update = lmb*np.ones(theta.shape[0]).reshape((-1,1))
                gradients += update 
            
            theta = theta - eta*gradients

            if lr_scheduler:
                t = epoch*n_batches+epoch
                eta = step_length(t, t0, t1)
            
    return theta.ravel()
   


# Momentum SGD

In [None]:
def momentum_sgd(X_train, t_train, theta, n_epoch, batch_size, eta, beta, lr_scheduler=False, ridge=False, lmb=0):
    n_batches = int(X_train.shape[0] // batch_size)
    Xt = np.concatenate((X_train, t_train), axis=1)
    print(f"Number of minibatches: {n_batches}")
    
    if lr_scheduler:
        t0 = 1.0; t1 = 100
        eta = t0/t1
        print(f"Using learning rate scheduler with initial learning rate: {eta}")

    
    for epoch in tqdm(range(n_epoch), f"Training {n_epoch} epochs"):      
        batches = np.take(Xt, np.random.permutation(Xt.shape[0]), axis=0)
        batches = np.array_split(batches, n_batches, axis=0)
        momentum = 0
        
        for batch in batches:
            xi = batch[:, :-1]
            yi = batch[:, -1].reshape(-1,1)
            
            # (2.0/n)*X.T @ (X @ beta-y)
            gradients = 2.0* xi.T @ ((xi @ theta)-yi)
            """
            RIDGE NOT YET SUPPORTED
            if ridge:
                # TODO: the coff regularization is not implemented correct. 
                #gradients +=  lmb*np.eye(theta.shape[0])
                update = lmb*np.ones(theta.shape[0]).reshape((-1,1))
                gradients += update 
            """
            momentum = beta*momentum + eta*gradients
            theta = theta - momentum

            if lr_scheduler:
                t = epoch*n_batches+epoch
                eta = step_length(t, t0, t1)
            
    return theta.ravel()

In [None]:
from sklearn.linear_model import SGDRegressor

# Created test data
""""""
SEED_VALUE = 70707070
np.random.seed(SEED_VALUE)

n = 100
x = 2*np.random.rand(n,1)
t = 4+3*x+np.random.randn(n,1)
X = np.c_[np.ones((n,1)), x]
X = np.hstack([X, x**2])
X = X[:,1:]
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.2, shuffle=True)


# Terrain data
"""
degree = 1
X = create_X(x,y, n=degree)
X = remove_intercept(X)
X_train, X_test, t_train, t_test = train_test_split(X, z_flat, test_size=0.2, shuffle=True)
"""

X_train, X_test = standard_scaling(X_train, X_test)
t_train, t_test = standard_scaling(t_train, t_test)

_,features_X = X_train.shape 
theta_initial_values = np.random.randn(features_X,1)
eta = 0.01
lmb=0.0001

n_epochs = 1000
batch_size = 5  #size of each minibatch
lr_scheduler = True

theta = new_sgd(X_train, t_train, theta_initial_values, n_epochs, batch_size, eta, lr_scheduler=lr_scheduler, ridge=True, lmb=lmb)

print(f"theta from new SGD: {theta}")

theta = momentum_sgd(X_train, t_train, theta_initial_values, n_epochs, batch_size, eta, beta=0.9, lr_scheduler=lr_scheduler)

print(f"theta from momentum SGD: {theta}")

sgdreg = SGDRegressor(max_iter = n_epochs, fit_intercept=False, penalty='l2', eta0=eta, alpha=lmb)
# sgdreg = SGDRegressor(max_iter = n_epochs, fit_intercept=False, penalty=None, eta0=eta)
sgdreg.fit(X_train,t_train.ravel())
print(f"sgdreg from scikit: {sgdreg.coef_}")
# print(f"sgdreg from scikit: {sgdreg.intercept_}, {sgdreg.coef_}")