In [76]:
import numpy as np
import sympy as sp

# Generate a random integer matrix C of size 4x3 
#C = np.random.randint(1, 10, size=(4, 3))
C = np.array([[2,-11,3], [6,-2,3], [5,18,-4], [7,25,1]], dtype=np.float64)
print("Matrix C:")
print(C)

# Compute A1 = C^T * C
A1 = C.T @ C
print("\nMatrix A1 = C^T * C:")
print(A1)

# Find the characteristic equation of A1
A1_sym = sp.Matrix(A1)
lambda_symbol = sp.symbols('x')
char_eq = A1_sym.charpoly(lambda_symbol).as_expr()
print("\nCharacteristic equation of A1:")
print(char_eq)
eigenvalues, eigenvectors = np.linalg.eig(A1)
print("\nEigenvalues of A1:")
print(eigenvalues)
print("\nEigenvectors of A1 (each column corresponds to an eigenvalue):")
print(eigenvectors)

Matrix C:
[[  2. -11.   3.]
 [  6.  -2.   3.]
 [  5.  18.  -4.]
 [  7.  25.   1.]]

Matrix A1 = C^T * C:
[[ 114.  231.   11.]
 [ 231. 1074.  -86.]
 [  11.  -86.   35.]]

Characteristic equation of A1:
1.0*x**3 - 1223.0*x**2 + 103138.0*x - 1007475.0

Eigenvalues of A1:
[1132.73283645   79.01012846   11.25703509]

Eigenvectors of A1 (each column corresponds to an eigenvalue):
[[-0.21977197 -0.84673264 -0.48450399]
 [-0.97274019  0.15252645  0.17467743]
 [ 0.07400541 -0.50968571  0.85717191]]


In [77]:
import numpy as np

def power_method(C, num_iterations=100, tol=1e-6):
    n, _ = C.shape
    # Start with a random vector
    b_k = np.random.rand(n)
    iterates = []
    
    for _ in range(num_iterations):
        # Calculate the matrix-by-vector product Ab
        b_k1 = np.dot(C, b_k)
        
        # Calculate the norm
        b_k1_norm = np.linalg.norm(b_k1)
        
        # Re normalize the vector
        b_k = b_k1 / b_k1_norm
        
        # Approximate the eigenvalue
        lambda_k = np.dot(b_k.T, np.dot(C, b_k))
        
        iterates.append(lambda_k)
        
        # Check convergence
        if len(iterates) > 1 and np.abs(iterates[-1] - iterates[-2]) < tol:
            break
    
    return lambda_k, b_k, iterates

# A2, A3 MATR
lambda_1, x_1, iterates_1 = power_method(A1, 100)
x_1_hat = x_1 / np.linalg.norm(x_1)

x_1_hat_outer_product = np.outer(x_1_hat, x_1_hat)


print("\nFirst 10 iterates_1 of eigenvalue generated by the algorithm:")
print(iterates_1[:10])

print("\nFinal largest eigenvalue (λ1):")
print(lambda_1)

print("\nFinal corresponding eigenvector (x1), normalized (x̂1):")
print(x_1_hat)

# Comparing results with numpy's linalg.eig function
eigenvalues, eigenvectors = np.linalg.eig(A1)
index_max = np.argmax(eigenvalues)
largest_eigenvalue1 = eigenvalues[index_max]
normalized_largest_eigenvector1 = eigenvectors[:, index_max] / np.linalg.norm(eigenvectors[:, index_max])

print("\nComparison with NumPy's computation:")
print("Largest eigenvalue (λ1) via NumPy:", largest_eigenvalue1)
print("Normalized largest eigenvector (x̂1) via NumPy:", normalized_largest_eigenvector1)            


First 10 iterates_1 of eigenvalue generated by the algorithm:
[1123.1482989640858, 1132.686221720402, 1132.7326096837915, 1132.7328353418934, 1132.7328364397865, 1132.7328364451278]

Final largest eigenvalue (λ1):
1132.7328364451278

Final corresponding eigenvector (x1), normalized (x̂1):
[ 0.21977211  0.97274017 -0.07400533]

Comparison with NumPy's computation:
Largest eigenvalue (λ1) via NumPy: 1132.732836445155
Normalized largest eigenvector (x̂1) via NumPy: [-0.21977197 -0.97274019  0.07400541]


In [74]:
x_1_hat_outer_product = np.outer(x_1_hat, x_1_hat)
A2 = A1 - x_1_hat_outer_product @ A1

print("Matrix A2:\n",A2)

lambda_2, x_2, iterates_2 = power_method(A2, 100)
x_2_hat = x_2 / np.linalg.norm(x_2)

print("\nFirst 10 iterates_2 of eigenvalue generated by the algorithm:")
print(iterates_2[:10])

print("\nLargest eigenvalue (λ2) of A2:")
print(lambda_2)
print("\nCorresponding eigenvector (x2), normalized (\hat{x}_2):")
print(x_2_hat)

Matrix A2:
 [[ 59.28926226 -11.15703684  29.42313898]
 [-11.15680009   2.18164073  -4.45680451]
 [ 29.42309073  -4.45693834  28.79626057]]

First 10 iterates_2 of eigenvalue generated by the algorithm:
[78.75797960005164, 79.00499127110662, 79.01002417442609, 79.01012634692341, 79.0101284209637, 79.0101284630654]

Largest eigenvalue (λ2) of A2:
79.0101284630654

Corresponding eigenvector (x2), normalized (\hat{x}_2):
[ 0.84673084 -0.15252607  0.5096888 ]


In [80]:
x_2_hat_outer_product = np.outer(x_2_hat, x_2_hat)
A3 = A1 - x_1_hat_outer_product @ A1 - x_2_hat_outer_product @ A2

print("Matrix A3:\n",A3)

lambda_3, x_3, iterates_3 = power_method(A3, 100)
x_3_hat = x_3 / np.linalg.norm(x_3)

print("\nFirst 10 iterates_2 of eigenvalue generated by the algorithm:")
print(iterates_3[:10])

print("\nLargest eigenvalue (λ3) of A3:")
print(lambda_3)
print("\nCorresponding eigenvector (x3), normalized (\hat{x}_3):")
print(x_3_hat)

Matrix A3:
 [[ 2.64267924 -0.9526339  -4.67504862]
 [-0.95274646  0.34346675  1.68548539]
 [-4.67526583  1.68559227  8.2708891 ]]

First 10 iterates_2 of eigenvalue generated by the algorithm:
[11.25703490747088, 11.25703509185307]

Largest eigenvalue (λ3) of A3:
11.25703509185307

Corresponding eigenvector (x3), normalized (\hat{x}_3):
[ 0.48450703 -0.17467797 -0.85717008]


For λ1 and x_1_hat
Largest eigenvalue (λ1) is significantly larger than the others.
The corresponding eigenvector x_1_hat
points in the direction of greatest variance.

First 10 Iterates for λ2 and x_2_hat
The iterates converge more slowly than for λ2 reflecting the reduced gap between the largest and the second-largest eigenvalues in A2
The direction of x_2_hat is orthogonal to x_1_hat since A2 is constructed to remove the influence of x_1_hat

First 10 Iterates for λ3 and x_3_hat
Convergence is even slower, indicating the proximity of eigenvalues in A3
The eigenvector x_3_hat orthogonal tox_1_hat and x_2_hat aligning with the expectation due to the construction of A3

In [81]:
import numpy as np

def f(x, y):
    return 10*x**4 - 20*x**2*y + x**2 + 10*y**2 - 2*x + 1

def grad_f(x, y):
    df_dx = 40*x**3 - 40*x*y + 2*x - 2
    df_dy = -20*x**2 + 20*y
    return np.array([df_dx, df_dy])

def armijo_rule(x, grad, alpha=1, beta=0.5, sigma=0.1):
    while f(x[0] - alpha * grad[0], x[1] - alpha * grad[1]) > f(x[0], x[1]) - sigma * alpha * np.dot(grad, grad):
        alpha *= beta
    return alpha

def gradient_descent_with_armijo(init_point, max_iter=1000, epsilon=1e-6):
    x = np.array(init_point)
    for i in range(max_iter):
        grad = grad_f(x[0], x[1])
        if np.linalg.norm(grad) < epsilon:
            break
        alpha = armijo_rule(x, grad)
        x = x - alpha * grad
    return x

# Initial point
init_point = [0.1, 0.1]  # Example initial point

stationary_point = gradient_descent_with_armijo(init_point)
print(f"Stationary point found at: {stationary_point}")


Stationary point found at: [1.00005111 1.00010296]


In [83]:
import numpy as np

class ObjectiveFunction():

    def eval(self, x, y):
        return 10 * (x ** 4) - 20 * (x ** 2) * y + (x ** 2) + 10 * (y ** 2) - 2 * x  + 1

    def gradient(self, x, y):
        return np.array([40 * (x ** 3) - 40 * x * y + 2 * x -2 , 20 * y - 20 * (x ** 2)])

    def hessian(self, x, y):
        df_dx2 = 120 * (x ** 2) - 40 * y + 2
        df_dxy= -40 * x
        df_dy2 = 20

        return np.array([[df_dx2, df_dxy], [df_dxy, df_dy2]])

In [84]:
class GradientMethod():
    def __init__(self):
        self.iterations = 0

    def optimize(self, x_0, y_0, func, beta, sigma, epsilon):
        x = x_0
        y = y_0
        while self.stopping_criteria(x,y, func, epsilon):
            descent_direction = -1 * func.gradient(x,y)

            step_size = self.step_size(x,y,func,beta,descent_direction,sigma)

            # update step
            x = x + step_size * descent_direction[0]
            y = y + step_size * descent_direction[1]
            self.iterations += 1

        return x , y

    def stopping_criteria(self, x,y, func, epsilon):
        return np.linalg.norm(func.gradient(x,y)) >= epsilon

    def step_size(self, x,y, func, beta, d, sigma):
        i = 0
        inequality_satisfied = True
        while inequality_satisfied:
            if func.eval(x + np.power(beta, i) * d[0], y + np.power(beta, i) * d[1]) <= func.eval(x,y) + np.power(beta, i) * sigma * func.gradient(x,y).dot(d):
                break
            i += 1

        return np.power(beta, i)
    
objective = ObjectiveFunction()
starting_point = np.array([-1.2, 1])
x0 = -1.2
y0 = 1
beta = 0.5
sigma = 0.0001
epsilon = 0.0001

optimizer = GradientMethod()

x = optimizer.optimize(x0,y0, objective,beta,sigma,epsilon)

print(f'Optimal Point: {x}')
print(f'Iterations: {optimizer.iterations}')

Optimal Point: (1.0000860864244827, 1.0001742864714784)
Iterations: 641


In [89]:
import numpy as np

def f(x, y):
    return 10*x**4 - 20*x**2*y + x**2 + 10*y**2 - 2*x + 1

def grad_f(x, y):
    return np.array([40*x**3 - 40*x*y + 2*x - 2, -20*x**2 + 20*y])

def armijo_rule(x, y, alpha, grad, beta=0.5, sigma=0.4):
    """Adjust step size using Armijo's rule."""
    while f(x - alpha * grad[0], y - alpha * grad[1]) > f(x, y) - sigma * alpha * np.dot(grad, grad):
        alpha *= beta
    return alpha

# Gradient Descent with Armijo's Rule
def gradient_descent_armijo(x0, y0, max_iters=100, tol=1e-6):
    x, y = x0, y0
    for i in range(max_iters):
        grad = grad_f(x, y)
        if np.linalg.norm(grad) < tol:
            break
        alpha = 1  # Initial step size
        alpha = armijo_rule(x, y, alpha, grad)
        x_new = x - alpha * grad[0]
        y_new = y - alpha * grad[1]
        
        if i < 10:  # Print the first 10 iterates
            print(f"Iterate {i+1}: x = {x_new:.4f}, y = {y_new:.4f}, α = {alpha:.4f}, f(x, y) = {f(x_new, y_new):.4f}")
        
        x, y = x_new, y_new

    return x, y, f(x, y)

# Initial guess
x0, y0 = 0.5, 0.5

# Run the gradient descent
x_star, y_star, f_star = gradient_descent_armijo(x0, y0)

print(f"\nOptimal points: x* = {x_star}, y* = {y_star}, f(x*, y*) = {f_star}")

Iterate 1: x = 0.5938, y = 0.4219, α = 0.0156, f(x, y) = 0.2131
Iterate 2: x = 0.6322, y = 0.4002, α = 0.0156, f(x, y) = 0.1353
Iterate 3: x = 0.6556, y = 0.3999, α = 0.0312, f(x, y) = 0.1276
Iterate 4: x = 0.6526, y = 0.4186, α = 0.0312, f(x, y) = 0.1212
Iterate 5: x = 0.6684, y = 0.4231, α = 0.0312, f(x, y) = 0.1155
Iterate 6: x = 0.6704, y = 0.4526, α = 0.0625, f(x, y) = 0.1087
Iterate 7: x = 0.6820, y = 0.4516, α = 0.0156, f(x, y) = 0.1029
Iterate 8: x = 0.7486, y = 0.5194, α = 0.2500, f(x, y) = 0.0800
Iterate 9: x = 0.7373, y = 0.5322, α = 0.0156, f(x, y) = 0.0703
Iterate 10: x = 0.8325, y = 0.6458, α = 0.5000, f(x, y) = 0.0504

Optimal points: x* = 0.952887534774993, y* = 0.9066781285019561, f(x*, y*) = 0.002236916771591435
