<a href="https://colab.research.google.com/github/AP-047/RClass-Classification-by-Rational-Approximation/blob/main/notebooks/training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Step 1: Import Data**

In [None]:
from sklearn.datasets import fetch_openml
import numpy as np

# Import MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
x_full = mnist.data.values  # Full dataset
y_full = mnist.target.values.astype(int)  # Labels (0–9)

**Step 2: Create Subset**

In [None]:
# Create a subset of training data (1000 images per digit)
subset_size = 1000
x_subset = []
y_subset = []

for digit in range(10):
    digit_indices = np.where(y_full == digit)[0][:subset_size]
    x_subset.append(x_full[digit_indices])
    y_subset.append(y_full[digit_indices])

x_subset = np.vstack(x_subset)
y_subset = np.hstack(y_subset)

**Step 3: Preprocess Data**

In [None]:
# Thresholding: Convert to binary
threshold_value = 128
x_binarized = (x_subset > threshold_value).astype(int)

# Flattening is already done; each row is a 784-dimensional vector

**Step 4: Apply PCA**

In [None]:
from sklearn.decomposition import PCA

# Apply PCA to reduce dimensionality
n_components = 20  # Adjustable based on explained variance
pca = PCA(n_components=n_components)
x_pca = pca.fit_transform(x_binarized)

print(f"Original shape: {x_binarized.shape}, PCA shape: {x_pca.shape}")

**Step 5: Train Classifiers**

In [None]:
import pickle
import cupy as cp
import numpy as np
from scipy.optimize import linprog
import matplotlib.pyplot as plt

# Train a classifier for each digit
for digit in range(10):
    print(f"Training classifier for digit {digit}...")

    # Assign labels: Positive for the current digit, negative for others
    y_binary = (y_subset == digit).astype(int)

    ######################################
    # Target function (|x1 + x2 + x3 + x4 - 0.1|)
    def target_function(x):
        return cp.abs(x[:, 0] + x[:, 1] + x[:, 2] + x[:, 3] - 0.1)

    ######################################
    def rational_function(x, alpha, beta):
        """
        r(x) = (α_0 + α_1*x1**1 + α_2*x2**2 + α_3*x3**3 + α_4*x4**4) /
               (β_0 + β_1*x1**1 + β_2*x2**2 + β_3*x3**3 + β_4*x4**4)
        """
        # Ensure x is treated as a 1D vector when passed for a single data point
        x = cp.asarray(x)  # Convert to CuPy array
        # Numerator polynomial
        numerator = alpha[0] + alpha[1] * x[0]**1 + alpha[2] * x[1]**2 + alpha[3] * x[2]**3 + alpha[4] * x[3]**4
        # Denominator polynomial
        denominator = beta[0] + beta[1] * x[0]**1 + beta[2] * x[1]**2 + beta[3] * x[2]**3 + beta[4] * x[3]**4

        return numerator / denominator

    delta = 1e-6  # a small positive value

    # Feasibility check function
    def check_feasibility_and_compute_coefficients(z, x_data, y_data):
        num_data_points = x_data.shape[0]
        num_coefficients = 4+1  # (1 constant term + 4 variables)

        # Construct G(x) and H(x) matrices for numerator and denominator
        G = cp.zeros((num_data_points, num_coefficients))  # Numerator matrix
        H = cp.zeros((num_data_points, num_coefficients))  # Denominator matrix

        for i in range(num_data_points):
          G[i, 0] = 1
          H[i, 0] = 1
          for j in range(num_coefficients-1):
            G[i, j+1] = x_data[i, j] ** (j+1)
            H[i, j+1] = x_data[i, j] ** (j+1)

        # print(f"G: {G}")
        # print(f"H: {H}")

        # Construct constraints for Ax <= b
        A = []
        b = []

        for i in range(num_data_points):
            f_plus_z = y_data[i] + z  # Upper bound
            f_minus_z = y_data[i] - z  # Lower bound

            # Constraint 1: (f(xi) - z) * β^T H(xi) - α^T G(xi) ≤ θ
            # (-G(xi))αT + (f(xi) - z).H(xi)βT + (-1)θ ≤ 0
            constraint_1 = cp.zeros(2 * num_coefficients + 1)
            # (1) Coefficients of α
            constraint_1[0:num_coefficients] = -G[i]
            # (2) Coefficients of β
            constraint_1[num_coefficients:2 * num_coefficients] = (f_minus_z) * H[i]
            # (3) Coefficient of θ (last element)
            constraint_1[-1] = -1
            A.append(constraint_1)
            b.append(0)

            # Constraint 2: α^T G(xi) + (-1).(f(xi) + z) * β^T H(xi) ≤ θ
            # G(xi).αT + (-1)(f(xi) - z).H(xi)βT + (-1)θ ≤ 0
            constraint_2 = cp.zeros(2 * num_coefficients + 1)
            # (1) Coefficients of α
            constraint_2[0:num_coefficients] = G[i]
            # (2) Coefficients of β
            constraint_2[num_coefficients:2 * num_coefficients] = -(f_plus_z) * H[i]
            # (3) Coefficient of θ (last element)
            constraint_2[-1] = -1
            A.append(constraint_2)
            b.append(0)

            # Constraint 3: β^T H(x) ≥ δ
            # (0)α^T + (-H(x)) β^T + (0)θ ≤ -δ
            constraint_3 = cp.zeros(2 * num_coefficients + 1)
            # Coefficient of β
            constraint_3[num_coefficients:2 * num_coefficients] = -H[i]
            A.append(constraint_3)
            b.append(-delta)

        # Convert CuPy arrays to NumPy arrays for SciPy
        A = cp.asnumpy(cp.array(A))
        b = cp.asnumpy(cp.array(b))

        # print(f"A =", len(A))
        # print(f"A: {A[0]}")

        # print(f"len(A[0]): {len(A[0])}")
        # print(f"len(b): {len(b)}")

        #for linprog API -
        # Objective function to minimize θ
        c = cp.asnumpy(cp.zeros(2 * num_coefficients + 1))
        c[-1] = 1  # Only θ has a coefficient in the objective function

        #📌 Solve the linear programming problem
        result = linprog(c, A_ub=A, b_ub=b, method="highs")

        # Check feasibility and return results
        if result.success:
            alpha_coefficients = result.x[:num_coefficients]
            beta_coefficients = result.x[num_coefficients:2 * num_coefficients]
            theta = result.x[-1]
            return True, alpha_coefficients, beta_coefficients, theta
        else:
            return False, None, None, None


    #📌 Bisection loop
    def bisection_loop(x_data, y_data, uL, uH, precision):
        optimal_alpha, optimal_beta, optimal_theta = None, None, None
        z_values = []

        while uH - uL > precision:
            z = (uL + uH) / 2
            z_values.append(z)
            feasible, alpha_coefficients, beta_coefficients, theta = check_feasibility_and_compute_coefficients(z, x_data, y_data)

            if feasible:
                uH = z
                optimal_alpha, optimal_beta, optimal_theta = alpha_coefficients, beta_coefficients, theta
            else:
                uL = z

        return uH, optimal_alpha, optimal_beta, optimal_theta, z_values

    #📌 Main execution
    if __name__ == "__main__":
        # Data setup
        x_data = cp.array([[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [2, 4, 5, 3], [3, 4, 1, 6], [4, 3, 6, 2], [2, 6, 1, 3], [2, 3, 5, 6], [3, 2, 4, 3], [3, 1 ,2, 1]])
        y_data = target_function(x_data)

        # Bisection parameters
        uL = 0  # Initial lower bound
        uH = 50  # Initial upper bound
        precision = 1e-6  # Precision threshold

        # Run bisection loop
        optimal_z, optimal_alpha, optimal_beta, optimal_theta, z_values = bisection_loop(x_data, y_data, uL, uH, precision)

        #########################################################################
        # Train the rational function optimizer (your existing logic)
        # Example: Your check_feasibility_and_compute_coefficients() function
        optimal_z, optimal_alpha, optimal_beta, optimal_theta, z_values = bisection_loop(x_pca, y_binary, uL=0, uH=100, precision=1e-6)

        # Print results
        print(f"Number of Iterations: {len(z_values)}")
        print(f"z Values in all Iterations: {z_values}")
        print(f"Optimal z (Maximum Deviation): {optimal_z}")

        # # Plot convergence of z values
        # plt.figure(figsize=(8, 6))
        # plt.plot(range(len(z_values)), z_values, marker='o', linestyle='-')
        # plt.xlabel("Iteration")
        # plt.ylabel("z Value")
        # plt.title("Convergence of z Values")
        # plt.grid(True)
        # plt.show()

        print(f"Optimized Coefficients (Numerator α): {optimal_alpha}")
        print(f"Optimized Coefficients (Denominator β): {optimal_beta}")
        print(f"Optimal θ: {optimal_theta}")
        print(f"rational_function =", rational_function(x_data[0], optimal_alpha, optimal_beta))
        # print(f"x_data =", x_data)

    ################################
    # Save the model
    model = {
        "alpha": optimal_alpha,
        "beta": optimal_beta,
        "theta": optimal_theta
    }
    with open(f"models/classifier_{digit}.pkl", "wb") as file:
        pickle.dump(model, file)
    print(f"Model for digit {digit} saved!")