In [27]:
import numpy as np
#Importing same data used in Homework 1
from sklearn.datasets import fetch_openml


In [28]:
# Loading the mammography dataset
data = fetch_openml('mammography', as_frame=True)
X, y = data.data, data.target


In [29]:
# Converting X to a NumPy array
X = X.to_numpy()

# Converting y to a NumPy array and ensure it contains numeric values
y = y.astype(float).to_numpy().reshape(-1, 1)

# Standardizing the features (mean=0, std=1)
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

In [30]:
# Adding a column of ones for the intercept
X = np.hstack((np.ones((X.shape[0], 1)), X))

# Defining the linear regression function and calculate the loss
def linear_regression(beta, X, y):
    y_pred = X.dot(beta)
    loss = np.sum((y_pred - y) ** 2)
    return loss

In [31]:
# Defining the gradient descent function
def gradient_descent(X, y, learning_rate, num_iterations):
    n = X.shape[0]
    p = X.shape[1]
    
    # Initializing the beta and best_beta to 0; Initialize best_loss.
    beta = np.zeros((p, 1))
    best_loss = float('inf')
    best_beta = np.zeros((p, 1))
    
    for i in range(num_iterations):
        # Computing the gradient of the loss function at beta
        y_pred = X.dot(beta)
        gradient = 2 * X.T.dot(y_pred - y) / n
        
        # Updating beta
        beta -= learning_rate * gradient
        
        # Keeping track of the best seen so far loss and parameters
        current_loss = linear_regression(beta, X, y)
        if current_loss < best_loss:
            best_loss = current_loss
            best_beta = beta
        
        # Printing beta and loss update within the for loop
        if i < 10 or i > 29990:
            print(f"Iteration: {i}, Beta Values: {beta.flatten()}")
            print(f"Best Loss: {current_loss}\n")
    
    # Returning a dictionary of final results
    return {'beta': best_beta, 'loss': best_loss}


In [32]:
# Setting the learning rate and number of iterations
learning_rate = 0.0001
num_iterations = 30000


In [13]:
# Running the gradient descent function
result = gradient_descent(X, y, learning_rate, num_iterations)


Iteration: 0, Beta Values: [-1.90700170e-04  7.20606774e-06 -1.09054622e-06 -2.84963172e-06
  1.55252558e-05  2.85046864e-05  1.01845843e-05]
Best Loss: 11178.797292819947

Iteration: 1, Beta Values: [-3.81362200e-04  1.44087788e-05 -2.18266811e-06 -5.70000168e-06
  3.10430768e-05  5.70014193e-05  2.03620397e-05]
Best Loss: 11174.59630933835

Iteration: 2, Beta Values: [-5.71986097e-04  2.16081349e-05 -3.27636446e-06 -8.55110899e-06
  4.65534668e-05  8.54902017e-05  3.05323699e-05]
Best Loss: 11170.397048822611

Iteration: 3, Beta Values: [-7.62571870e-04  2.88041377e-05 -4.37163407e-06 -1.14029527e-05
  6.20564295e-05  1.13971036e-04  4.06955783e-05]
Best Loss: 11166.199510540471

Iteration: 4, Beta Values: [-9.53119525e-04  3.59967889e-05 -5.46847571e-06 -1.42555321e-05
  7.75519684e-05  1.42443926e-04  5.08516685e-05]
Best Loss: 11162.003693760002

Iteration: 5, Beta Values: [-1.14362907e-03  4.31860904e-05 -6.56688817e-06 -1.71088460e-05
  9.30400874e-05  1.70908874e-04  6.10006442

In [33]:
# Printing the optimal theta (coefficients)
print("Optimal Theta (Coefficients):", result['beta'].flatten())
print("Best Loss:", result['loss'])

Optimal Theta (Coefficients): [-0.95113878  0.02121199 -0.02517584 -0.01669569  0.0614523   0.13308893
 -0.04567986]
Best Loss: 755.1140608180835


In [34]:
#To compare the Ordinary Least Squares (OLS) solutions from Homework 1 to the gradient descent estimate
#we can calculate the difference between the coefficients obtained by these two methods

# Loading the coefficients from Homework 1 (using 4 features)
ols_coefficients = np.array([1.172653, -0.235387, -0.036558, 0.137994])



In [35]:
# Get the coefficients obtained from gradient descent
gradient_descent_coefficients = result['beta'].flatten()[:4]  # Use the first 4 coefficients

# Calculate the absolute differences between OLS and gradient descent coefficients
differences = np.abs(ols_coefficients - gradient_descent_coefficients)


In [36]:
# Print the coefficient differences
print("Coefficient Differences:")
for i in range(len(ols_coefficients)):
    print(f"Beta_{i}: {differences[i]}")

Coefficient Differences:
Beta_0: 2.123791775056586
Beta_1: 0.25659898559630245
Beta_2: 0.011382159761906742
Beta_3: 0.15468968616197254
