In [1]:
import numpy as np

In [34]:
def linear_regression_normal_equation(X, y):
    """Normal equation method for linear equation
    
    Args:
        X: m x n matrix means history data
        y: m vector ground truth target
    
    Returns:
        theta: parameters for linear regression
    """
    X = np.array(X)
    y = np.array(y)
    X_T = np.transpose(X)
    X_2 = np.dot(X_T, X)
    X_2_inv = np.linalg.inv(X_2)
    X_2_inv_X_T = np.dot(X_2_inv, X_T)
    theta = np.dot(X_2_inv_X_T, y)
    theta = np.round(theta, 4)
    return theta


In [None]:
import numpy as np

def compute_cost(X, y, theta):
    """
    Computes the Mean Squared Error (MSE) cost function for linear regression.

    Args:
        X: Feature matrix (m, n), where m is the number of samples and n is the number of features.
        y: Target vector (m, 1).
        theta: Parameter vector (n, 1).

    Returns:
        cost: Scalar value representing the MSE loss.
    """
    m = len(y)  # Number of training examples
    h = np.dot(X, theta)  # Predicted values (m, 1)
    cost = (1 / (2 * m)) * np.sum((h - y) ** 2)  # MSE loss
    return cost

def linear_regression_gradient_descent(X, y, alpha, iterations, _lambda=0, epsilon=1e-6):
    """
    Performs gradient descent to optimize theta for linear regression.

    Args:
        X: Feature matrix (m, n), where m is the number of samples and n is the number of features.
        y: Target vector (m, ).
        alpha: Learning rate (step size for gradient updates).
        iterations: Maximum number of gradient descent iterations.
        _lambda: Regularization parameter (default = 0, meaning no regularization).
        epsilon: Convergence threshold (default = 1e-6), stops if cost change is below this.

    Returns:
        theta: Optimized parameter vector (n, ).
    """

    # Convert X and y to numpy arrays
    X = np.array(X)
    y = np.array(y).reshape(-1, 1)  # Ensure y is a column vector (m, 1)

    prev_cost = float('inf')  # Initialize previous cost as infinity
    
    m, n = X.shape  # Get number of samples (m) and features (n)
    theta = np.zeros((n, 1))  # Initialize theta with zeros (n, 1)

    # Gradient descent loop
    for i in range(iterations):
        # Compute predictions
        h = np.dot(X, theta)  # Predicted values (m, 1)
        
        # Compute gradient (derivative of cost function w.r.t. theta)
        gradient = (1 / m) * np.dot(X.T, (h - y))  # Gradient shape: (n, 1)
        
        # Update theta using gradient descent with L2 regularization (if _lambda > 0)
        theta = theta * (1 - alpha * _lambda / m) - alpha * gradient

        # Compute current cost
        cost = compute_cost(X, y, theta)
        
        # Check for convergence: stop if cost change is below epsilon
        if abs(prev_cost - cost) < epsilon:
            print(f'Converged at iteration {i}')
            break
        
        prev_cost = cost  # Update previous cost
        
        # Uncomment for debugging:
        # print(f'Iteration {i}, Cost: {cost:.6f}, Theta: {theta.flatten()}')

    return np.round(theta.flatten(), 4)  # Round theta for better readability


In [51]:
X = [[1, 1], [1, 2], [1, 3]]
y = [1, 2, 3]
print(linear_regression_normal_equation(X, y))
print(linear_regression_gradient_descent(X, y, 0.1, 10000, 0.1))

[-0.  1.]
Converged at iteration 259
[0.083  0.9575]
