### Question 1


#### L1 Regularization


In [1]:
import numpy as np

def soft_thresholding(x, alpha):
    return np.sign(x) * np.maximum(np.abs(x) - alpha, 0)

def proximal_gradient_method(initial_x, max_iterations=1000, alpha_type="fixed", r=1, tol=1e-6):
    x = np.array(initial_x, dtype=float)
    history = [x.copy()]
    
    for k in range(max_iterations):
        if alpha_type == "fixed":
            alpha_k = 1/2
        elif alpha_type == "variable":
            alpha_k = 1 / (r + k)
        else:
            raise ValueError("alpha_type must be 'fixed' or 'variable'")
        
        # Gradient of the smooth part
        gradient_g = np.array([x[0] - 2, x[1] - 2])
        
        # Gradient step
        x_gradient_step = x - alpha_k * gradient_g
        
        # Proximal step (using soft-thresholding for L1 norm)
        x_next = soft_thresholding(x_gradient_step, alpha_k * 0.5)
        
        # Check for convergence
        if np.linalg.norm(x_next - x) < tol:
            break
        
        # Update x
        x = x_next
        history.append(x.copy())
    
    return x, history

# Parameters
initial_x = [0.0, 0.0]  # Initial guess
r = 8  # Example last digit of roll number

# Solve with fixed step size
solution_fixed, history_fixed = proximal_gradient_method(initial_x, max_iterations=100, alpha_type="fixed", r=r)

# Solve with variable step size
solution_variable, history_variable = proximal_gradient_method(initial_x, max_iterations=100, alpha_type="variable", r=r)

# Results
print("Solution with fixed step size:", solution_fixed)
print("Solution with variable step size:", solution_variable)


Solution with fixed step size: [1.49999928 1.49999928]
Solution with variable step size: [1.40186916 1.40186916]


#### L2 Regularization


In [2]:
import numpy as np

def l2_proximal_operator(x, alpha):
    """
    Proximal operator for L2 regularization (ridge)
    For L2 norm, the proximal operator is: x / (1 + alpha)
    """
    return x / (1 + alpha)

def proximal_gradient_method_l2(initial_x, max_iterations=1000, alpha_type="fixed", r=1, tol=1e-6):
    x = np.array(initial_x, dtype=float)
    history = [x.copy()]
    
    for k in range(max_iterations):
        if alpha_type == "fixed":
            alpha_k = 1/2
        elif alpha_type == "variable":
            alpha_k = 1 / (r + k)
        else:
            raise ValueError("alpha_type must be 'fixed' or 'variable'")
        
        # Gradient of the smooth part (quadratic loss)
        gradient_g = np.array([x[0] - 2, x[1] - 2])
        
        # Gradient step
        x_gradient_step = x - alpha_k * gradient_g
        
        # Proximal step (using L2 proximal operator)
        # The 0.5 factor is the regularization parameter (similar to your original code)
        x_next = l2_proximal_operator(x_gradient_step, alpha_k * 0.5)
        
        # Check for convergence
        if np.linalg.norm(x_next - x) < tol:
            break
        
        # Update x
        x = x_next
        history.append(x.copy())
    
    return x, history

# Parameters
initial_x = [0.0, 0.0]  # Initial guess
r = 8  # Example last digit of roll number

# Solve with fixed step size
solution_fixed, history_fixed = proximal_gradient_method_l2(initial_x, max_iterations=100, alpha_type="fixed", r=r)

# Solve with variable step size
solution_variable, history_variable = proximal_gradient_method_l2(initial_x, max_iterations=100, alpha_type="variable", r=r)

# Results
print("Solution with fixed step size:", solution_fixed)
print("Solution with variable step size:", solution_variable)

Solution with fixed step size: [1.33333276 1.33333276]
Solution with variable step size: [1.30993379 1.30993379]


### Question 2


##### L1 Regularization


In [3]:
import numpy as np
import pandas as pd

def soft_thresholding(x, alpha):
    return np.sign(x) * np.maximum(np.abs(x) - alpha, 0)

def compute_step_size(A):
    eigvals = np.linalg.eigvalsh(A.T @ A)
    max_eigenvalue = np.max(eigvals)
    # Step size alpha_k
    alpha_k = 1 / (max_eigenvalue / 2 + 0.5)
    return alpha_k

def proximal_gradient_method(A, y, lambda_val, max_iterations=1000, tol=1e-6):
    # Initialize x
    x = np.zeros(A.shape[1])
    history = [x.copy()]

    # Compute the step size
    alpha_k = compute_step_size(A)

    for k in range(max_iterations):
        # Gradient step
        gradient = A.T @ (A @ x - y)
        x_gradient_step = x - alpha_k * gradient

        # Proximal step (soft-thresholding for L1 norm)
        x_next = soft_thresholding(x_gradient_step, alpha_k * lambda_val)

        # Check for convergence
        if np.linalg.norm(x_next - x) < tol:
            break

        # Update x
        x = x_next
        history.append(x.copy())

    return x, history


data=pd.read_csv('./train_a.csv')

A = data[['Value1','Value2']].values
y = data['Result'].values

# Add a column of ones to A for the intercept term
A = np.hstack([A, np.ones((A.shape[0], 1))])


R = 8  # last digit of roll number
lambda_val = abs(R / 10 - 5)

# Solve the optimization problem
solution, history = proximal_gradient_method(A, y, lambda_val)

# Extract coefficients and intercept
a = solution[:2]
beta = solution[2]

# Output the results
print("Coefficients a:", a)
print("Intercept beta:", beta)
print("Solution is:", solution)



Coefficients a: [-0.20587412 -0.45864919]
Intercept beta: 4.7441788010967956e-07
Solution is: [-2.05874121e-01 -4.58649186e-01  4.74417880e-07]


#### L2 Regularization


In [4]:
import numpy as np
import pandas as pd

def compute_step_size(A):
    eigvals = np.linalg.eigvalsh(A.T @ A)
    max_eigenvalue = np.max(eigvals)
    # Step size alpha_k
    alpha_k = 1 / (max_eigenvalue / 2 + 0.5)
    return alpha_k

def proximal_gradient_method_l2(A, y, lambda_val, max_iterations=1000, tol=1e-6):
    # Initialize x
    x = np.zeros(A.shape[1])
    history = [x.copy()]

    # Compute the step size
    alpha_k = compute_step_size(A)

    for k in range(max_iterations):
        # Gradient of the smooth part (least squares loss + L2 regularization)
        gradient = A.T @ (A @ x - y) + lambda_val * x
        
        # For L2 regularization, we can directly update x using the gradient
        # This is because the proximal operator for L2 regularization can be combined
        # with the gradient step into a single update
        x_next = x - alpha_k * gradient

        # Check for convergence
        if np.linalg.norm(x_next - x) < tol:
            break

        # Update x
        x = x_next
        history.append(x.copy())

    return x, history

# Load and prepare data
data = pd.read_csv('./train_a.csv')

A = data[['Value1','Value2']].values
y = data['Result'].values

# Add a column of ones to A for the intercept term
A = np.hstack([A, np.ones((A.shape[0], 1))])

# Set lambda value based on roll number
R = 8  # last digit of roll number
lambda_val = abs(R / 10 - 5)

# Solve the optimization problem
solution, history = proximal_gradient_method_l2(A, y, lambda_val)

# Extract coefficients and intercept
a = solution[:2]
beta = solution[2]

# Output the results
print("Coefficients a:", a)
print("Intercept beta:", beta)
print("Solution is:", solution)

Coefficients a: [-0.20591411 -0.45863131]
Intercept beta: -4.741042437662977e-06
Solution is: [-2.05914109e-01 -4.58631309e-01 -4.74104244e-06]


### Question 3


#### L1 Regularization


In [5]:
import numpy as np
import pandas as pd

def soft_thresholding(x, alpha):
    return np.sign(x) * np.maximum(np.abs(x) - alpha, 0)

def compute_step_size(A):
    eigvals = np.linalg.eigvalsh(A.T @ A)
    max_eigenvalue = np.max(eigvals)
    # Step size alpha_k
    alpha_k = 1 / (max_eigenvalue / 2 + 0.5)
    return alpha_k

def proximal_gradient_method(A, y, lambda_val, max_iterations=1000, tol=1e-6):
    # Initialize x
    x = np.zeros(A.shape[1])
    history = [x.copy()]

    # Compute the step size
    alpha_k = compute_step_size(A)

    for k in range(max_iterations):
        # Gradient step
        gradient = A.T @ (A @ x - y)
        x_gradient_step = x - alpha_k * gradient

        # Proximal step (soft-thresholding for L1 norm)
        x_next = soft_thresholding(x_gradient_step, alpha_k * lambda_val)

        # Check for convergence
        if np.linalg.norm(x_next - x) < tol:
            break

        # Update x
        x = x_next
        history.append(x.copy())

    return x, history


data=pd.read_csv('./4 columns.csv')

A = data[['area','bedrooms','bathrooms']].values
y = data['price'].values

# Add a column of ones to A for the intercept term
A = np.hstack([A, np.ones((A.shape[0], 1))])


R = 8  # last digit of roll number
lambda_val = abs(R / 10 - 5)

# Solve the optimization problem
solution, history = proximal_gradient_method(A, y, lambda_val)

# Extract coefficients and intercept
a = solution[:3]
beta = solution[3]

# Output the results
print("Coefficients a:", a)
print("Intercept beta:", beta)
print("Solution is:", solution)



Coefficients a: [-6.07354283e-02  9.54632440e+01  5.63261542e+01]
Intercept beta: 23.18466936095755
Solution is: [-6.07354283e-02  9.54632440e+01  5.63261542e+01  2.31846694e+01]


#### L2 Regularization


In [6]:
import numpy as np
import pandas as pd

def compute_step_size(A):
    eigvals = np.linalg.eigvalsh(A.T @ A)
    max_eigenvalue = np.max(eigvals)
    # Step size alpha_k
    alpha_k = 1 / (max_eigenvalue / 2 + 0.5)
    return alpha_k

def proximal_gradient_method_l2(A, y, lambda_val, max_iterations=1000, tol=1e-6):
    # Initialize x
    x = np.zeros(A.shape[1])
    history = [x.copy()]

    # Compute the step size
    alpha_k = compute_step_size(A)

    for k in range(max_iterations):
        # Gradient of the loss function (MSE + L2 regularization)
        gradient = A.T @ (A @ x - y) + lambda_val * x
        
        # Update step for L2 regularization
        # No need for proximal operator as L2 is smooth
        x_next = x - alpha_k * gradient

        # Check for convergence
        if np.linalg.norm(x_next - x) < tol:
            break

        # Update x
        x = x_next
        history.append(x.copy())

    return x, history

# Load and prepare data
data = pd.read_csv('./4 columns.csv')

# Prepare feature matrix A and target vector y
A = data[['area', 'bedrooms', 'bathrooms']].values
y = data['price'].values

# Add a column of ones to A for the intercept term
A = np.hstack([A, np.ones((A.shape[0], 1))])

# Set lambda value based on roll number
R = 8  # last digit of roll number
lambda_val = abs(R / 10 - 5)

# Solve the optimization problem
solution, history = proximal_gradient_method_l2(A, y, lambda_val)

# Extract coefficients and intercept
a = solution[:3]  # Coefficients for area, bedrooms, bathrooms
beta = solution[3]  # Intercept term

# Output the results
print("Coefficients [area, bedrooms, bathrooms]:", a)
print("Intercept:", beta)
print("Complete solution:", solution)

# Optional: Add prediction functionality
def predict_price(area, bedrooms, bathrooms):
    features = np.array([area, bedrooms, bathrooms, 1])
    return np.dot(solution, features)

# Example prediction
print("\nExample prediction:")
example_house = [1500, 3, 2]  # area, bedrooms, bathrooms
predicted_price = predict_price(*example_house)
print(f"Predicted price for house with {example_house[0]} sqft, {example_house[1]} bedrooms, {example_house[2]} bathrooms:")
print(f"${predicted_price:,.2f}")

Coefficients [area, bedrooms, bathrooms]: [-6.30923131e-02  9.54631200e+01  5.63260822e+01]
Intercept: 23.184641120164265
Complete solution: [-6.30923131e-02  9.54631200e+01  5.63260822e+01  2.31846411e+01]

Example prediction:
Predicted price for house with 1500 sqft, 3 bedrooms, 2 bathrooms:
$327.59


### Question 4


#### L1 Regularization


In [7]:
import numpy as np
import pandas as pd

def model_function(x, theta):
    """
    Computes y = exp(theta[0]*x) * (cos(theta[1]*x) + sin(theta[2]*x))
    """
    return np.exp(theta[0]*x) * (np.cos(theta[1]*x) + np.sin(theta[2]*x))

def compute_gradient(x, y, theta):
    """
    Computes the gradient of the loss function with respect to theta
    """
    m = len(x)
    y_pred = model_function(x, theta)
    error = y_pred - y
    
    # Partial derivatives
    grad = np.zeros(3)
    
    # ∂/∂θ₁
    grad[0] = np.sum(error * x * y_pred)
    
    # ∂/∂θ₂
    grad[1] = np.sum(error * np.exp(theta[0]*x) * (-x*np.sin(theta[1]*x)))
    
    # ∂/∂θ₃
    grad[2] = np.sum(error * np.exp(theta[0]*x) * (x*np.cos(theta[2]*x)))
    
    return grad / m

def soft_thresholding(x, alpha):
    """
    Soft thresholding operator for L1 regularization
    """
    return np.sign(x) * np.maximum(np.abs(x) - alpha, 0)

def proximal_gradient_method(x, y, lambda_val, max_iterations=1000, learning_rate=0.01, tol=1e-6):
    # Initialize parameters
    theta = np.zeros(3)
    history = [theta.copy()]
    
    for iteration in range(max_iterations):
        # Store previous theta for convergence check
        theta_prev = theta.copy()
        
        # Gradient step
        gradient = compute_gradient(x, y, theta)
        theta_gradient_step = theta - learning_rate * gradient
        
        # Proximal step (L1 regularization)
        theta = soft_thresholding(theta_gradient_step, learning_rate * lambda_val)
        
        # Check convergence
        if np.linalg.norm(theta - theta_prev) < tol:
            break
            
        history.append(theta.copy())
    
    return theta, history

# Load data
data = pd.read_csv('./Customer Purchasing Behaviors (1).csv')
x = data['loyalty_score'].values
y = data['purchase_frequency'].values

# Set lambda value based on roll number (R/10)
R = 98  # Replace with your last two digits of roll number
lambda_val = R/10 + 1

# Normalize the input features for better convergence
x = (x - x.mean()) / x.std()

# Find optimal parameters
theta_optimal, history = proximal_gradient_method(x, y, lambda_val)

print("Optimal parameters (θ₁, θ₂, θ₃):", theta_optimal)

# Function to estimate purchase frequency for a given loyalty score
def estimate_purchase_frequency(loyalty_score):
    # Normalize the input using the same parameters as training data
    x_normalized = (loyalty_score - x.mean()) / x.std()
    return model_function(x_normalized, theta_optimal)

# Example prediction for R/10 + 1
test_value = R/10 + 1
predicted_frequency = estimate_purchase_frequency(test_value)
print(f"\nEstimated purchase frequency for loyalty score {test_value:.2f}:", predicted_frequency)

Optimal parameters (θ₁, θ₂, θ₃): [0. 0. 0.]

Estimated purchase frequency for loyalty score 10.80: 1.0


#### L2 Regularization
