In [3]:
import numpy as np
import timeit

np.random.seed()  # for repeatability

# Parameters
N = 200  # Number of data points
d = 100  # Dimension of each data point (adjust as per failure dimension in previous experiment)
lambda_reg = 0.001
eps = np.random.randn(N, 1)  # Random noise

# Create data matrix A and label vector y
A = np.random.randn(N, d)

# Normalize the columns
for j in range(A.shape[1]):
    A[:, j] = A[:, j] / np.linalg.norm(A[:, j])

# True underlying parameter vector xorig
xorig = np.ones((d, 1))

# Label vector y = Axorig + noise
y = np.dot(A, xorig) + eps

# Initialize optimization variable x
x = np.zeros((d, 1))

# Parameters for optimization
epochs = 1e4  # Number of epochs
t = 1  # Initial learning rate

# Index array for shuffling
arr = np.arange(N)

# Start the timer
start = timeit.default_timer()

# Stochastic gradient descent (SGD) loop
for epoch in range(int(epochs)):
    np.random.shuffle(arr)  # Shuffle the indices every epoch
    for i in np.nditer(arr):  # Pass through data points
        # Compute the gradient gi(x) for the current data point
        # For example, assume gi(x) = A[i, :]^T * (A[i, :] * x - y[i])
        gi = A[i, :].reshape(-1, 1) * (np.dot(A[i, :], x) - y[i])

        # Update x using the gradient descent step
        x -= 1/t * gi

        # Update the learning rate (optional)
        t += 1
        if t > 1e4:
            t = 1

# End the timer
alglab6time = timeit.default_timer() - start

# Compute the desired results
Ax_alglab6 = np.dot(A, x)
grad_norm = np.linalg.norm(gi)  # ∇fλ(x∗)
Ax_diff = np.linalg.norm(Ax_alglab6 - y)  # ||Ax∗ - y||²
x_diff = np.linalg.norm(x - xorig)  # ||x∗ - xorig||²

print(f"Time taken: {alglab6time} seconds")
print(f"||∇fλ(x∗)||: {grad_norm}")
print(f"||Ax∗ - y||²: {Ax_diff}")
print(f"||x∗ - xorig||²: {x_diff}")


Time taken: 45.49084089999087 seconds
||∇fλ(x∗)||: 0.2848006037559653
||Ax∗ - y||²: 11.03848539933142
||x∗ - xorig||²: 11.687490029344573
