# Stochastic Gradient Descent(SGD) Experiments

## Setting Path

In [1]:
import sys
sys.path.append('../Code Files')
sys.path.append('../Data')

## Import Libraries

In [2]:
#Import External Files
from SGD_Experiments_Data_Generation import linear
from optimization import SGD1, SGD2
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np

## Global Variables

In [None]:
# Get the current date and time
current_time = datetime.now()
formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")

## Generate Data[No Noise]

In [None]:
n_train = int(2e6) #number of training samples
n_test = int(1e6) #number of test samples
d = int(1e2) #Number of features in data
noise_std = 0 #No Noise
intercept = True #Whether to Add Bias to Linear Model

X_train, X_test, Y_train, Y_test, W, b = linear(n_train, n_test, d, noise_std, intercept)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape, W.shape, b)

### Save Data

In [None]:
np.save(f"../Data/X_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", X_train)
np.save(f"../Data/X_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", X_test)
np.save(f"../Data/Y_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", Y_train)
np.save(f"../Data/Y_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", Y_test)
np.save(f"../Data/W_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", W)
np.save(f"../Data/b_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", b)

### Load Data

In [None]:
X_train = np.load(f"../Data/X_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
X_test = np.load(f"../Data/X_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
Y_train = np.load(f"../Data/Y_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
Y_test = np.load(f"../Data/Y_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
W = np.load(f"../Data/W_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
b = np.load(f"../Data/b_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 1

In [None]:
lr = 0.005
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 64

In [None]:
lr = 0.005
epochs = 500
batch_size = 64
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 128

In [None]:
lr = 0.005
epochs = 500
batch_size = 128
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 256

In [None]:
lr = 0.005
epochs = 500
batch_size = 256
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005 w/Momentum of 0.85, Epochs = 500, Batch Size = 1

In [None]:
lr = 0.005
momentum = 0.85
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, momentum = momentum, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005 w/Momentum of 0.85 and Nesterov Momentum Calculation, Epochs = 500, Batch Size = 1

In [None]:
lr = 0.005
momentum = 0.85
nesterov = True
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, momentum = momentum, nesterov = nesterov, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Adagrad Optimization, and Batch Size = 1

In [None]:
lr = 0.005
optimizer_code = 1
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, optimizer_code = optimizer_code, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, RMSProp Optimization, Momentum = 0.85, and Decay Factor = 0.9, and Batch Size = 1

In [None]:
lr = 0.005
epochs = 500
batch_size = 1
momentum = 0.85
decay_factor = 0.9
optimizer_code = 2
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, momentum = momentum, decay_factor = decay_factor, optimizer_code = optimizer_code, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

## Generate Data, Noise = Gaussian Noise

In [None]:
n_train = int(2e6) #number of training samples
n_test = int(1e6) #number of test samples
d = int(1e2) #Number of features in data
noise_std = 0.10 #Add Gaussian Noise with standard deviation of 0.10 to Response Variable
intercept = True #Whether to Add Bias to Linear Model

X_train, X_test, Y_train, Y_test, W, b = linear(n_train, n_test, d, noise_std, intercept)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape, W.shape, b)

### Save Data

In [None]:
np.save(f"../Data/X_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", X_train)
np.save(f"../Data/X_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", X_test)
np.save(f"../Data/Y_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", Y_train)
np.save(f"../Data/Y_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", Y_test)
np.save(f"../Data/W_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", W)
np.save(f"../Data/b_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy", b)

### Load Data

In [None]:
X_train = np.load(f"../Data/X_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
X_test = np.load(f"../Data/X_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
Y_train = np.load(f"../Data/Y_train_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
Y_test = np.load(f"../Data/Y_test_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
W = np.load(f"../Data/W_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")
b = np.load(f"../Data/b_time={formatted_time}_train={n_train}_test={n_test}_d={d}_noise={noise_std}_intercept={intercept}.npy")

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 1

In [None]:
lr = 0.005
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 64

In [None]:
lr = 0.005
epochs = 500
batch_size = 64
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 128

In [None]:
lr = 0.005
epochs = 500
batch_size = 128
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Epochs = 500, Batch Size = 256

In [None]:
lr = 0.005
epochs = 500
batch_size = 256
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005 w/Momentum of 0.85, Epochs = 500, Batch Size = 1

In [None]:
lr = 0.005
momentum = 0.85
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, momentum = momentum, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005 w/Momentum of 0.85 and Nesterov Momentum Calculation, Epochs = 500, Batch Size = 1

In [None]:
lr = 0.005
momentum = 0.85
nesterov = True
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, momentum = momentum, nesterov = nesterov, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, Adagrad Optimization, and Batch Size = 1

In [None]:
lr = 0.005
optimizer_code = 1
epochs = 500
batch_size = 1
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, optimizer_code = optimizer_code, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

### SGD, Least Squares, Fixed Learning Rate of 0.005, RMSProp Optimization, Momentum = 0.85, and Decay Factor = 0.9, and Batch Size = 1

In [None]:
lr = 0.005
epochs = 500
batch_size = 1
momentum = 0.85
decay_factor = 0.9
optimizer_code = 2
W_estimated, b_estimated, losses = SGD1(X = X_train, Y = Y_train, lr = lr, epochs = epochs, batch_size = batch_size, momentum = momentum, decay_factor = decay_factor, optimizer_code = optimizer_code, bias = intercept)

In [None]:
plt.plot(range(1, len(losses)+1), losses, label='Loss')
plt.title('Loss as a function of Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()