In [2]:
import cvxpy as cp
import numpy as np

# Define the problem data
n = 3  # Number of variables
m = 4  # Number of constraints

# Define the objective function coefficients
c = np.array([1, 2, 3], dtype=np.float64)

# Define the constraint matrix A and the right-hand side b
A = np.array([[1, -1, 0], [-1, 0, 1], [0, 1, -1], [1, 1, 1]], dtype=np.float64)
b = np.array([1, -2, 3, 4], dtype=np.float64)

# Create the variables
x = cp.Variable(n)

# Define the objective function
objective = cp.Minimize(cp.quad_form(x, np.eye(n)) + c @ x)

# Define the constraints
constraints = [A @ x <= b]

# Create the problem instance
problem = cp.Problem(objective, constraints)

# Solve the problem
problem.solve()

# Print the optimal solution
print("Optimal solution:")
print(x.value)

# Print the optimal objective value
print("Optimal objective value:")
print(problem.value)


Optimal solution:
[ 2.0038618e-16 -1.0000000e+00 -2.0000000e+00]
Optimal objective value:
-3.0


In [6]:
import numpy as np

def hinge_loss(w, X, y, C):
    """
    Hinge loss function for SVM.
    w: vector of coefficients
    X: matrix of feature vectors
    y: vector of labels (-1 or 1)
    C: regularization parameter
    """
    n = X.shape[0]
    scores = np.dot(X, w)
    margins = 1 - y * scores
    loss = np.maximum(0, margins)
    loss = np.mean(loss) + 0.5 * C * np.dot(w, w)
    return loss

def gradient_descent(X, y, learning_rate, C, num_iterations):
    """
    Gradient descent algorithm for finding optimal hyperplane.
    X: matrix of feature vectors
    y: vector of labels (-1 or 1)
    learning_rate: learning rate for gradient descent
    C: regularization parameter
    num_iterations: number of iterations for gradient descent
    """
    n, d = X.shape
    w = np.zeros(d)  # Initialize weights to zeros
    
    for iteration in range(num_iterations):
        # Compute the gradients
        scores = np.dot(X, w)
        margins = 1 - y * scores
        indicators = (margins > 0).astype(int)
        gradients = -np.dot(indicators * y, X) / n + C * w
        
        # Update the weights
        w -= learning_rate * gradients
    
        # Compute and print the loss
        loss = hinge_loss(w, X, y, C)
        print(f"Iteration {iteration + 1}: Loss = {loss}")
    
    return w

# Generate some synthetic data
np.random.seed(42)
X = np.random.randn(100, 2)  # Feature vectors
y = np.random.choice([-1, 1], 100)  # Labels

# Run gradient descent to find the optimal hyperplane
learning_rate = 0.01
C = 1.0
num_iterations = 100000

optimal_hyperplane = gradient_descent(X, y, learning_rate, C, num_iterations)
print("Optimal hyperplane:")
print(optimal_hyperplane)


Iteration 1: Loss = 0.9993251743441033
Iteration 2: Loss = 0.9986637777187588
Iteration 3: Loss = 0.9980155428862586
Iteration 4: Loss = 0.9973802079269254
Iteration 5: Loss = 0.9967575161332829
Iteration 6: Loss = 0.9961472159063339
Iteration 7: Loss = 0.9955490606539009
Iteration 8: Loss = 0.9949628086909914
Iteration 9: Loss = 0.9943882231421441
Iteration 10: Loss = 0.9938250718457187
Iteration 11: Loss = 0.9932731272600922
Iteration 12: Loss = 0.9927321663717197
Iteration 13: Loss = 0.9922019706050256
Iteration 14: Loss = 0.9916823257340891
Iteration 15: Loss = 0.9911730217960837
Iteration 16: Loss = 0.990673853006445
Iteration 17: Loss = 0.99018461767572
Iteration 18: Loss = 0.9897051181280763
Iteration 19: Loss = 0.9892351606214308
Iteration 20: Loss = 0.9887745552691676
Iteration 21: Loss = 0.9883231159634145
Iteration 22: Loss = 0.9878806602998458
Iteration 23: Loss = 0.9874470095039821
Iteration 24: Loss = 0.9870219883589558
Iteration 25: Loss = 0.9866054251347159
Iteration 26

In [7]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def cost_function(w, X, y, regularization_param):
    m = X.shape[0]
    h = sigmoid(np.dot(X, w))
    cost = -(1/m) * (np.dot(y, np.log(h)) + np.dot((1-y), np.log(1-h))) + (regularization_param/(2*m)) * np.dot(w[1:], w[1:])
    return cost

def gradient(w, X, y, regularization_param):
    m = X.shape[0]
    h = sigmoid(np.dot(X, w))
    grad = (1/m) * np.dot(X.T, h - y) + (regularization_param/m) * np.concatenate(([0], w[1:]))
    return grad

def hessian(w, X, y, regularization_param):
    m = X.shape[0]
    h = sigmoid(np.dot(X, w))
    diag = np.diag(h * (1-h))
    hess = (1/m) * np.dot(np.dot(X.T, diag), X) + (regularization_param/m) * np.eye(X.shape[1])
    return hess

def newton_method(X, y, regularization_param, num_iterations):
    n = X.shape[1]
    w = np.zeros(n)  # Initialize weights to zeros

    for iteration in range(num_iterations):
        grad = gradient(w, X, y, regularization_param)
        hess = hessian(w, X, y, regularization_param)
        w -= np.linalg.inv(hess).dot(grad)

        # Compute and print the cost
        cost = cost_function(w, X, y, regularization_param)
        print(f"Iteration {iteration + 1}: Cost = {cost}")

    return w

# Generate some synthetic data
np.random.seed(42)
X = np.random.randn(100, 2)  # Feature vectors
X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)  # Add bias term
y = np.random.choice([0, 1], 100)  # Labels

# Run Newton's method to find the optimal hyperplane
regularization_param = 0.1
num_iterations = 10

optimal_hyperplane = newton_method(X, y, regularization_param, num_iterations)
print("Optimal hyperplane:")
print(optimal_hyperplane)


Iteration 1: Cost = 0.6435438552024285
Iteration 2: Cost = 0.6432251100197063
Iteration 3: Cost = 0.643225000085276
Iteration 4: Cost = 0.6432250000848557
Iteration 5: Cost = 0.6432250000848557
Iteration 6: Cost = 0.6432250000848555
Iteration 7: Cost = 0.6432250000848555
Iteration 8: Cost = 0.6432250000848555
Iteration 9: Cost = 0.6432250000848555
Iteration 10: Cost = 0.6432250000848555
Optimal hyperplane:
[ 0.22456319 -0.59090302  0.34662362]


In [10]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def cost_function(w, X, y, regularization_param):
    m = X.shape[0]
    h = sigmoid(np.dot(X, w))
    cost = -(1/m) * (np.dot(y, np.log(h)) + np.dot((1-y), np.log(1-h))) + (regularization_param/(2*m)) * np.dot(w[1:], w[1:])
    return cost

def stochastic_gradient_descent(X, y, regularization_param, learning_rate, num_epochs, batch_size):
    n = X.shape[1]
    m = X.shape[0]
    w = np.zeros(n)  # Initialize weights to zeros

    for epoch in range(num_epochs):
        indices = np.random.permutation(m)
        X_shuffled = X[indices]
        y_shuffled = y[indices]

        for i in range(0, m, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]

            grad = gradient(w, X_batch, y_batch, regularization_param)
            w -= learning_rate * grad

        # Compute and print the cost
        cost = cost_function(w, X, y, regularization_param)
        print(f"Epoch {epoch + 1}: Cost = {cost}")

    return w

# Generate some synthetic data
np.random.seed(42)
X = np.random.randn(100, 2)  # Feature vectors
X = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)  # Add bias term
y = np.random.choice([0, 1], 100)  # Labels

# Set hyperparameters
regularization_param = 0.1
learning_rate = 0.01
num_epochs = 10
batch_size = 10

# Run stochastic gradient descent to find the optimal hyperplane
optimal_hyperplane = stochastic_gradient_descent(X, y, regularization_param, learning_rate, num_epochs, batch_size)
print("Optimal hyperplane:")
print(optimal_hyperplane)


Epoch 1: Cost = 0.6910137886790889
Epoch 2: Cost = 0.6889730359248116
Epoch 3: Cost = 0.6870326519736788
Epoch 4: Cost = 0.685189373496289
Epoch 5: Cost = 0.6834285113981967
Epoch 6: Cost = 0.6817481235914344
Epoch 7: Cost = 0.6801443970350357
Epoch 8: Cost = 0.6786141697707812
Epoch 9: Cost = 0.6771529734581885
Epoch 10: Cost = 0.6757612466844649
Optimal hyperplane:
[ 0.06046944 -0.0939935   0.06897012]


In [15]:
import numpy as np

def objective_function(x):
    return x[0]**2 + 2*x[1]**2 + 3*x[2]**2  # Example objective function

def coordinate_descent(initial_solution, learning_rate, max_iterations):
    solution = initial_solution.copy().astype(float)
    num_variables = len(solution)

    for iteration in range(max_iterations):
        for i in range(num_variables):
            # Fix all other variables except the current one
            fixed_variables = np.delete(solution, i)

            # Optimize the current coordinate using gradient descent
            def optimize_coordinate(coordinate):
                updated_solution = np.insert(fixed_variables, i, coordinate)
                return gradient_descent(updated_solution, learning_rate, max_iterations)[i]

            optimized_coordinate = optimize_coordinate(solution[i])

            # Update the solution with the optimized coordinate
            solution[i] = optimized_coordinate

        # Print the current solution and objective value
        objective_value = objective_function(solution)
        print(f"Iteration {iteration + 1}: Solution = {solution}, Objective Value = {objective_value}")

    return solution

def gradient_descent(coordinate, learning_rate, max_iterations):
    for _ in range(max_iterations):
        gradient = 2 * coordinate
        coordinate -= learning_rate * gradient

    return coordinate

# Set the initial solution, learning rate, and maximum iterations
initial_solution = np.array([1, 1, 1])
learning_rate = 0.1
max_iterations = 10

# Run coordinate descent to find the optimal solution
optimal_solution = coordinate_descent(initial_solution, learning_rate, max_iterations)
print("Optimal Solution:")
print(optimal_solution)


Iteration 1: Solution = [0.10737418 0.10737418 0.10737418], Objective Value = 0.06917529027641085
Iteration 2: Solution = [0.01152922 0.01152922 0.01152922], Objective Value = 0.0007975367974709497
Iteration 3: Solution = [0.00123794 0.00123794 0.00123794], Objective Value = 9.194973245195339e-06
Iteration 4: Solution = [0.00013292 0.00013292 0.00013292], Objective Value = 1.0601082388670313e-07
Iteration 5: Solution = [1.42724769e-05 1.42724769e-05 1.42724769e-05], Objective Value = 1.2222215858006925e-09
Iteration 6: Solution = [1.53249554e-06 1.53249554e-06 1.53249554e-06], Objective Value = 1.4091255496643014e-11
Iteration 7: Solution = [1.64550456e-07 1.64550456e-07 1.64550456e-07], Objective Value = 1.6246111488989163e-13
Iteration 8: Solution = [1.76684706e-08 1.76684706e-08 1.76684706e-08], Objective Value = 1.8730491301895973e-15
Iteration 9: Solution = [1.89713759e-09 1.89713759e-09 1.89713759e-09], Objective Value = 2.159478621380737e-17
Iteration 10: Solution = [2.03703598e

In [16]:
import numpy as np

def svm_smo(X, y, C, max_iterations):
    m, n = X.shape
    alphas = np.zeros(m)  # Initialize the Lagrange multipliers
    b = 0.0  # Initialize the bias term
    errors = np.zeros(m)  # Initialize the error cache

    for _ in range(max_iterations):
        num_changed_alphas = 0
        for i in range(m):
            E_i = np.dot(alphas * y, np.dot(X, X[i])) + b - y[i]  # Calculate the error for example i
            if (y[i] * E_i < -0.001 and alphas[i] < C) or (y[i] * E_i > 0.001 and alphas[i] > 0):
                j = np.random.choice(list(range(i)) + list(range(i+1, m)))  # Randomly select a second example j

                E_j = np.dot(alphas * y, np.dot(X, X[j])) + b - y[j]  # Calculate the error for example j

                alpha_i_old = alphas[i]  # Store the old value of alpha_i
                alpha_j_old = alphas[j]  # Store the old value of alpha_j

                if y[i] != y[j]:
                    L = max(0, alphas[j] - alphas[i])  # Compute the lower and upper bounds for alpha_j
                    H = min(C, C + alphas[j] - alphas[i])
                else:
                    L = max(0, alphas[i] + alphas[j] - C)  # Compute the lower and upper bounds for alpha_j
                    H = min(C, alphas[i] + alphas[j])

                if L == H:
                    continue

                eta = 2 * np.dot(X[i], X[j]) - np.dot(X[i], X[i]) - np.dot(X[j], X[j])  # Compute the second derivative of the objective function
                if eta >= 0:
                    continue

                alphas[j] -= y[j] * (E_i - E_j) / eta  # Update alpha_j using the second derivative and the error difference
                alphas[j] = np.clip(alphas[j], L, H)  # Clip alpha_j to ensure it stays within the bounds

                if abs(alphas[j] - alpha_j_old) < 0.00001:
                    continue

                alphas[i] += y[i] * y[j] * (alpha_j_old - alphas[j])  # Update alpha_i using alpha_j

                b1 = b - E_i - y[i] * (alphas[i] - alpha_i_old) * np.dot(X[i], X[i]) - y[j] * (alphas[j] - alpha_j_old) * np.dot(X[i], X[j])  
                # Compute the bias terms
                b2 = b - E_j - y[i] * (alphas[i] - alpha_i_old) * np.dot(X[i], X[j]) - y[j] * (alphas[j] - alpha_j_old) * np.dot(X[j], X[j])

                if 0 < alphas[i] < C:
                    b = b1
                elif 0 < alphas[j] < C:
                    b = b2
                else:
                    b = (b1 + b2) / 2

                errors[i] = E_i  # Update the error cache
                errors[j] = E_j

                num_changed_alphas += 1

        if num_changed_alphas == 0:
            break

    # Compute the weight vector
    w = np.dot(X.T, alphas * y)

    return w, b

# Example usage
X = np.array([[1, 2], [2, 3], [3, 1], [4, 3]])  # Input features
y = np.array([-1, -1, 1, 1])  # Labels
C = 1.0  # Regularization parameter
max_iterations = 100  # Maximum number of iterations

w, b = svm_smo(X, y, C, max_iterations)

print("Optimal hyperplane:")
print("Weight vector:", w)
print("Threshold (b):", b)


Optimal hyperplane:
Weight vector: [ 0.832 -0.416]
Threshold (b): -1.080000000000001
