## Answer 1 – Paola Valdes-Sueiras​

In [None]:
import numpy as np
import pandas as pd

# Load training and test data from Excel files
X_train = pd.read_csv('train_data_set.csv', header=None).values # Training Data
y_train = pd.read_csv('train_label_set.csv', header=None).values.flatten() # Training Labels
X_test = pd.read_csv('test_data_set.csv', header=None).values # Test Data
y_test = pd.read_csv('test_label_set.csv', header=None).values.flatten() # Test Labels

# Parameters
m = 20 # Number of activation functions
lambda_ = 0.01 # Regularization parameter
step_size = 0.001 # Gradient of descent step size
num_epochs = 1000 # Number of training epochs

# Initialize weights randomly
d = X_train.shape[1] # Number of features
W = np.random.randn(m, d) # Initialize weights for m activation functions

# Linear activation function
def F_ANN(X, W):
   return np.dot(X, W.T).sum(axis=1)

# Training loop
for epoch in range(num_epochs):
   # Compute predictions
   preds = F_ANN(X_train, W)
   # Compute gradient of loss with respect to w
   error = preds - y_train
   loss_grad = (1 / len(y_train)) * np.dot(error, X_train)
   # Add regularization gradient
   reg_grad = lambda_ * np.sign(W)
   grad = loss_grad + reg_grad
   # Gradient descent update
   W -= step_size * grad
   # Compute and print loss for monitoring
   loss = (1 / (2 * len(y_train))) * np.sum(error ** 2) + lambda_*np.sum(np.abs(W))
   if epoch % 100 == 0:
       print(f"Epoch {epoch}, Loss: {loss: .4f}")

# Evaluate on test data
test_preds = F_ANN(X_test, W)
test_loss = (1 / (2 * len(y_test))) * np.sum((test_preds - y_test)**2)
print(f"Test Loss: {test_loss:.4f}")

## Answer 2 - Andres ​

In [None]:
from handle_data import load_data
import ml as ml
import numpy as np

X_train, y_train, X_test, y_test = load_data('data')

input_size = X_train.shape[1]

model = ml.NeuralNetwork([input_size, 100, 100, 1], ['relu', 'relu', 'relu'])
model.train(X_train.T, y_train.reshape(1,-1), epochs=100, cost_fn='l1_reg_mse',reg_lambda=0)

## Answer 3 – Andrea Riquezes Gete​

In [None]:
import numpy as np
import pandas as pd

# Load data from CSV files
train_data = np.loadtxt("train_data_set.csv", delimiter=",")
train_labels = np.loadtxt("train_label_set.csv", delimiter=",").reshape(-1, 1)
test_data = np.loadtxt("test_data_set.csv", delimiter=",")
test_labels = np.loadtxt("test_label_set.csv", delimiter=",").reshape(-1, 1)

# Dimensions
N, d = train_data.shape  # Number of samples and dimensions
m = 20  # Number of neurons (as per problem statement)


# Hyperparameters
lambda_reg = 0.3  # Regularization parameter
tol = 1e-6        # Tolerance for convergence
max_iters = 100   # Maximum iterations for Newton's method


# Initialize weights W randomly
W = np.random.rand(d, m)

# Define helper functions
def compute_loss(X, Y, W, lambda_reg):
    """
    Compute the objective function value.
    """
    predictions = X @ W @ np.ones((m, 1))  # FAN_N(x, W)
    residual = predictions - Y
    loss = (1 / (2 * N)) * np.sum(residual**2) + lambda_reg * np.sum(W**2)
    return loss

def compute_gradient(X, Y, W, lambda_reg):
    """
    Compute the gradient of the loss function.
    """
    predictions = X @ W @ np.ones((m, 1))  # FAN_N(x, W)
    residual = predictions - Y
    grad = (1 / N) * (X.T @ (residual @ np.ones((1, m)))) + 2 * lambda_reg * W
    return grad

def compute_hessian(X, lambda_reg):
    """
    Compute the Hessian of the loss function.
    """
    hessian = (1 / N) * (X.T @ X) + 2 * lambda_reg * np.eye(d)
    return hessian

alpha = 0.01  # Learning rate multiplier for Newton's step
damping_factor = 1e-3  # Damping factor for Hessian

for iteration in range(max_iters):
    # Compute loss, gradient, and Hessian
    loss = compute_loss(train_data, train_labels, W, lambda_reg)
    grad = compute_gradient(train_data, train_labels, W, lambda_reg)
    hessian = compute_hessian(train_data, lambda_reg)

    # Dampen Hessian
    hessian += damping_factor * np.eye(hessian.shape[0])

    # Check gradient norm
    grad_norm = np.linalg.norm(grad)
    print(f"Iteration {iteration}: Loss = {loss}, Gradient Norm = {grad_norm}")

    # Solve for the Newton step
    try:
        hessian_inv = np.linalg.inv(hessian)
        step = hessian_inv @ grad
    except np.linalg.LinAlgError:
        print("Hessian is singular; stopping optimization.")
        break

    # Update weights with scaled Newton step
    W -= alpha * step

    # Monitor dynamics
    mse_term = (1 / (2 * N)) * np.sum((train_data @ W @ np.ones((m, 1)) - train_labels) ** 2)
    reg_term = lambda_reg * np.sum(W**2)
    print(f"Iteration {iteration}: MSE = {mse_term}, Regularization = {reg_term}, Total Loss = {loss}")

    if grad_norm < tol:
        print(f"Converged in {iteration + 1} iterations.")
        break

print(f"Iteration {iteration}: Gradient Norm = {np.linalg.norm(grad)}")
cond_number = np.linalg.cond(hessian)
print(f"Hessian Condition Number = {cond_number}")

# Compute final predictions
final_predictions = train_data @ W @ np.ones((m, 1))

# Compute Mean Squared Error term
mse_term = (1 / (2 * len(train_labels))) * np.sum((final_predictions - train_labels) ** 2)

# Compute Regularization term
reg_term = lambda_reg * np.sum(W**2)

# Compute Total Loss
final_loss = mse_term + reg_term

# Output the final loss
print(f"Final Loss: {final_loss}")
print(f"Mean Squared Error Term: {mse_term}")
print(f"Regularization Term: {reg_term}")


# Evaluate on test set
predictions = test_data @ W @ np.ones((m, 1))
mse_test = np.mean((predictions - test_labels)**2)
print("Test Set Mean Squared Error:", mse_test)

predictions = train_data @ W @ np.ones((m, 1))
print("Sample Predictions:", predictions[:5].flatten())
print("Sample Labels:", train_labels[:5].flatten())

## Answer 4 - Lindsay Carrillo​

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

# load data
train_data = pd.read_csv('/train_data_set.csv')
test_data = pd.read_csv('test_data_set.csv')
test_labels = pd.read_csv('test_label_set.csv')
train_labels = pd.read_csv('train_label_set.csv')


X_train = train_data.values
y_train = train_labels.values.ravel()
X_test = test_data.values
y_test = test_labels.values.ravel()

# grid search for hyperparameter tuning
param_grid = [0.001, 0.01, 0.1, 1, 10, 100]  # possible lambdas
#'alpha' used in place of 'lambda' ('lambda is python keyword'):
best_alpha = None
best_mse = float('inf')

for alpha in param_grid:
   # train linear model with l2 regularization
 
   # calculate w using normal equation with reg
   XTX = X_train.T @ X_train
   regularization_term = alpha * np.eye(X_train.shape[1])
   XTy = X_train.T @ y_train
   w = np.linalg.inv(XTX + regularization_term) @ XTy
   
   # make predictions on training set
   y_pred_train = X_train @ w
   
   # calculate mean squared error on training set
   mse = mean_squared_error(y_train, y_pred_train)
   print(f'Lambda: {alpha}, Training Mean Squared Error: {mse}')
   # update best alpha if current mse is lower
   if mse < best_mse:
       best_mse = mse
       best_alpha = alpha
       best_w = w

# make predictions on test set using best model
y_pred_test = X_test @ best_w

# calc mse on test set
test_mse = mean_squared_error(y_test, y_pred_test)

print(f'Best lambda: {best_alpha}')
print(f'Test Mean Squared Error: {test_mse}')

## Answer 5 – Yousef Bani Ahmad​

In [None]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the data
training_data = sio.loadmat('training_data.mat')
train_data, train_label = training_data['train_data_set'], training_data['train_label_set'].ravel()
train_label = train_label.reshape(-1, 1)

d = train_data.shape[1]
m = 20
lambda_reg = 0.01
learning_rate = 0.001
num_epochs = 50

# Initialize weights
W = np.random.randn(m, d) * 0.01

def activation(x):
    return x

def model(x, W):
    return np.dot(W.T, activation(x))

def compute_loss(x, y, W):
    predictions = np.dot(x, W.T).sum(axis=1)
    errors = predictions - y.squeeze()
    loss = (1 / (2 * len(y))) * np.sum(errors ** 2)
    reg_term = lambda_reg * np.sum(np.abs(W))
    return loss + reg_term

# Stochastic Gradient Descent
for epoch in range(num_epochs):
    for i in range(len(train_data)):
        x_i = train_data[i, :].reshape(-1, 1)
        y_i = train_label[i]
        
        # Compute prediction separately for each sample
        prediction = np.dot(W, x_i).sum()
        
        # Gradient of the loss with respect to W
        grad_w = np.zeros(W.shape)
        
        for j in range(m):
            grad_w[j] = (1 / len(train_data)) * (prediction - y_i) * x_i.squeeze()
        
        # Update weights with SGD
        W -= learning_rate * (grad_w + lambda_reg * np.sign(W))
    
    # Print loss for every 5 epochs
    if (epoch + 1) % 5 == 0:
        current_loss = compute_loss(train_data, train_label, W)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {current_loss}')

# Testing the trained model
testing_data = sio.loadmat('testing_data.mat')
test_data, test_label = testing_data['test_data_set'], testing_data['test_label_set'].ravel()

# Predictions for the test dataset
test_predictions = np.dot(test_data, W.T).sum(axis=1)

# Calculate metrics
mse = mean_squared_error(test_label, test_predictions)
mae = mean_absolute_error(test_label, test_predictions)
r2 = r2_score(test_label, test_predictions)

print(f'Mean Squared Error (MSE): {mse}')
print(f'Mean Absolute Error (MAE): {mae}')
print(f'R-Squared (R^2): {r2}')

# Select the specific data point at index 100
index = 100
data_point = test_data[index]

predicted_label = test_predictions[index]
actual_label = test_label[index]

print(f"Actual Label at index {index}: {actual_label}")
print(f"Predicted Label at index {index}: {predicted_label}")

# Visualize the data point
if len(data_point) == 784:  # Assuming it's a 28x28 image flattened
    image = data_point.reshape(28, 28)
    plt.imshow(image, cmap='gray')
    plt.title(f'Actual Label: {actual_label}, Predicted Label: {predicted_label}')
    plt.show()
else:
    print("Data point is not an image, visualizing as a line plot.")
    plt.plot(data_point)
    plt.title(f'Actual Label: {actual_label}, Predicted Label: {predicted_label}')
    plt.show()