In [95]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.model_selection import train_test_split
import time
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import seaborn as sns
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from scipy.sparse import csr_matrix
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from IPython.display import display
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning)


dense

In [96]:
def dcd_base(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l = len(y)

    for i in range(l):
        xi, yi = X[i], y[i]
        G = yi * np.dot(w, xi) - 1 + D * alpha[i]

        if alpha[i] == 0 and G >= 0:
            continue
        elif 0 < alpha[i] < np.inf and abs(G) < tol:
            continue

        Qii = np.dot(xi, xi) + D
        alpha_i_old = alpha[i]
        alpha[i] -= G / Qii
        alpha[i] = max(alpha[i], 0)
        w += (alpha[i] - alpha_i_old) * yi * xi

    return w, alpha

def dcd_random_perm(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l = len(y)

    for i in np.random.permutation(l):
        xi, yi = X[i], y[i]
        G = yi * np.dot(w, xi) - 1 + D * alpha[i]

        if alpha[i] == 0 and G >= 0:
            continue
        elif 0 < alpha[i] < np.inf and abs(G) < tol:
            continue

        Qii = np.dot(xi, xi) + D
        alpha_i_old = alpha[i]
        alpha[i] -= G / Qii
        alpha[i] = max(alpha[i], 0)
        w += (alpha[i] - alpha_i_old) * yi * xi

    return w, alpha

def dcd_online(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l = len(y)

    i = np.random.randint(0, l)
    xi, yi = X[i], y[i]
    G = yi * np.dot(w, xi) - 1 + D * alpha[i]

    if alpha[i] == 0 and G >= 0:
        return w, alpha
    elif 0 < alpha[i] < np.inf and abs(G) < tol:
        return w, alpha

    Qii = np.dot(xi, xi) + D
    alpha_i_old = alpha[i]
    alpha[i] -= G / Qii
    alpha[i] = max(alpha[i], 0)
    w += (alpha[i] - alpha_i_old) * yi * xi

    return w, alpha

def dcd_with_shrinking(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l, n = X.shape

    # Initialize the active set only at the first iteration
    if it == 0:
        dcd_with_shrinking.active_set = np.ones(l, dtype=bool)
        dcd_with_shrinking.shrink_counter = 0

    active_set = dcd_with_shrinking.active_set

    # Compute projected gradients for all variables
    G_proj = compute_projected_gradient(alpha, X, y, w, D)

    # Dynamic bounds of projected gradient on the active set
    bar_G = G_proj[active_set].max()
    under_G = G_proj[active_set].min()

    # Coordinate updates on the active variables
    for i in np.random.permutation(np.where(active_set)[0]):
        xi, yi = X[i], y[i]
        G = yi * np.dot(w, xi) - 1 + D * alpha[i]

        # KKT condition-based skipping
        if alpha[i] == 0 and G >= 0:
            continue
        elif 0 < alpha[i] < np.inf and abs(G) < tol:
            continue

        Qii = np.dot(xi, xi) + D
        alpha_old_i = alpha[i]
        alpha[i] = max(alpha[i] - G / Qii, 0)

        # Update the primal variable w efficiently
        w += (alpha[i] - alpha_old_i) * yi * xi

    # Shrinking: exclude variables unlikely to change significantly
    for i in range(l):
        if not active_set[i]:
            continue
        if alpha[i] == 0 and G_proj[i] > bar_G:
            active_set[i] = False
        elif alpha[i] == np.inf and G_proj[i] < under_G:
            active_set[i] = False

    # Periodically re-activate all variables to prevent exclusion of useful coordinates
    dcd_with_shrinking.shrink_counter += 1
    if dcd_with_shrinking.shrink_counter % 10 == 0 or np.sum(active_set) < l * 0.1:
        active_set[:] = True

    return w, alpha

methods_dense = {
    "Base": dcd_base,
    "Random Permutation": dcd_random_perm,
    "Online": dcd_online,
    "With Shrinking": dcd_with_shrinking
}

def compute_projected_gradient(alpha, X, y, w, D):
    G_proj = []
    for i in range(len(alpha)):
        G = y[i] * np.dot(w, X[i]) - 1 + D * alpha[i]
        if alpha[i] == 0:
            G_proj.append(min(0, G))
        elif alpha[i] == np.inf:  # Non utilisé ici pour L2-SVM, mais inclus pour cohérence
            G_proj.append(max(0, G))
        else:
            G_proj.append(G)
    return np.array(G_proj)
def dual_objective(alpha, X, y, D):
    w = np.dot((alpha * y), X)
    loss = 0.5 * np.dot(w, w) + D * np.dot(alpha, alpha)
    return loss


def kkt_violations(alpha, X, y, w, D, tol=1e-3):
    violations = 0
    for i in range(len(alpha)):
        G = y[i] * np.dot(w, X[i]) - 1 + D * alpha[i]
        if alpha[i] == 0 and G < -tol:
            violations += 1
        elif 0 < alpha[i] < np.inf and abs(G) > tol:
            violations += 1
        elif alpha[i] == np.inf and G > tol:
            violations += 1
    return violations / len(alpha)


def primal_gap(w, X, y, C, fP_star):
    fP = primal_objective(w, X, y, C)
    return abs(fP - fP_star) / abs(fP_star)


def primal_objective(w, X, y, C):
    margins = 1 - y * (X @ w)
    hinge_loss = np.sum(np.maximum(0, margins) ** 2)
    return 0.5 * np.dot(w, w) + C * hinge_loss


def evaluate_with_criteria_dense(X, y, methods, C=1.0, tol=1e-3, max_iter=1000):
    D = 1 / (2 * C)
    results = []

    for name, method in methods.items():
        for criterion_name in ['delta_alpha', 'projected_gradient', 'dual_objective']:
            alpha = np.zeros(len(y))
            w = np.zeros(X.shape[1])
            alpha_old = alpha.copy()
            f_old = dual_objective(alpha, X, y, D)
            convergence_log = [] 

            start_time = time.time()

            for it in range(max_iter):
                w, alpha = method(X, y, alpha, w, C, tol, it)

                if criterion_name == 'delta_alpha':
                    delta = np.linalg.norm(alpha - alpha_old)
                    convergence_log.append(delta)
                    if delta < tol:
                        break
                elif criterion_name == 'projected_gradient':
                    G_proj = compute_projected_gradient(alpha, X, y, w, D)
                    gap = G_proj.max() - G_proj.min()
                    convergence_log.append(gap)
                    if gap < tol:
                        break
                elif criterion_name == 'dual_objective':
                    f_new = dual_objective(alpha, X, y, D)
                    diff = abs(f_new - f_old)
                    convergence_log.append(diff)
                    if diff < tol:
                        break
                    f_old = f_new

                alpha_old = alpha.copy()

            elapsed = time.time() - start_time
            accuracy = accuracy_score(y, np.sign(X @ w))

            kkt_error = kkt_violations(alpha, X, y, w, D, tol)
            fP = primal_objective(w, X, y, C)
            
            results.append({
                'Method': name,
                'Criterion': criterion_name,
                'Time': elapsed,
                'Accuracy': accuracy,
                'Iterations': it + 1,
                'Log': convergence_log,
                'KKT_violation': kkt_error,
                'fP': fP # Primal function
            })

    fP_star = min(res['fP'] for res in results)
    for res in results:
        res['PrimalGap'] = abs(res['fP'] - fP_star) / abs(fP_star)

    return results
def display_results_table(results):
    df = pd.DataFrame(results)
    display(df)

def plot_convergence(results):
    plt.figure(figsize=(12, 6))
    prop_cycle = plt.rcParams['axes.prop_cycle']
    colors = prop_cycle.by_key()['color']
    extended_colors = plt.get_cmap('tab20').colors  

    for idx, res in enumerate(results):
        label = f"{res['Method']} - {res['Criterion']}"
        iters = list(range(1, len(res['Log']) + 1))
        color = extended_colors[idx % len(extended_colors)]  
        plt.plot(iters, res['Log'], label=label, color=color, linewidth=2)

    plt.yscale('log')
    plt.xlabel("Iterations")
    plt.ylabel("Stopping criterion (log)")
    plt.title("Convergence of stopping criteria")
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

sparse

In [97]:
def compute_projected_gradient_s(alpha, X, y, w, D):
    G = X.dot(w) - 1
    G = y * G + alpha / D

    mask_lower = (alpha <= 0) & (G > 0)
    mask_upper = (alpha >= 1) & (G < 0)
    mask_middle = (alpha > 0) & (alpha < 1)

    G_proj = np.zeros_like(G)
    G_proj[mask_lower | mask_upper | mask_middle] = G[mask_lower | mask_upper | mask_middle]
    
    return G_proj

def dual_objective_s(alpha, X, y, D):
    w = X.T.dot(alpha * y) 
    loss = 0.5 * w.dot(w) + D * np.dot(alpha, alpha)

    return loss


def dcd_base_s(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l = len(y)

    for i in range(l):
        xi = X.getrow(i)
        yi = y[i]
        G = yi * xi.dot(w).item() - 1 + D * alpha[i]

        a = alpha[i]
        if a == 0 and G >= 0:
            continue
        elif 0 < a < np.inf and abs(G) < tol:
            continue

        Qii = xi.multiply(xi).sum()
        Qii += D
        alpha_i_old = a
        alpha[i] -= G / Qii
        alpha[i] = max(alpha[i], 0)
        delta_alpha = alpha[i] - alpha_i_old
        if delta_alpha != 0:
            w[xi.indices] += delta_alpha * yi * xi.data

    return w, alpha

def dcd_random_perm_s(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l = len(y)

    for i in np.random.permutation(l):
        xi = X.getrow(i)  
        yi = y[i]

        G = yi * xi.dot(w) - 1 + D * alpha[i]
        G = G.item()  

        if alpha[i] == 0 and G >= 0:
            continue
        elif 0 < alpha[i] < np.inf and abs(G) < tol:
            continue

        Qii = xi.multiply(xi).sum() + D  
        alpha_i_old = alpha[i]
        alpha[i] -= G / Qii
        alpha[i] = max(alpha[i], 0)

        delta_alpha = alpha[i] - alpha_i_old
        if delta_alpha != 0:
            w[xi.indices] += delta_alpha * yi * xi.data

    return w, alpha

def dcd_online_s(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l = len(y)

    i = np.random.randint(0, l)
    xi = X.getrow(i)         
    yi = y[i]

    G = yi * xi.dot(w) - 1 + D * alpha[i]
    G = G.item()             

    if alpha[i] == 0 and G >= 0:
        return w, alpha
    elif 0 < alpha[i] < np.inf and abs(G) < tol:
        return w, alpha

    Qii = xi.multiply(xi).sum() + D

    alpha_i_old = alpha[i]
    alpha[i] -= G / Qii
    alpha[i] = max(alpha[i], 0)

    delta_alpha = alpha[i] - alpha_i_old
    if delta_alpha != 0:
        w[xi.indices] += delta_alpha * yi * xi.data

    return w, alpha

def dcd_with_shrinking_s(X, y, alpha, w, C, tol, it):
    D = 1 / (2 * C)
    l, n = X.shape

    if it == 0:
        dcd_with_shrinking_s.active_set = np.ones(l, dtype=bool)
        dcd_with_shrinking_s.shrink_counter = 0

    active_set = dcd_with_shrinking_s.active_set

    G_proj = compute_projected_gradient_s(alpha, X, y, w, D)
    bar_G = G_proj[active_set].max()
    under_G = G_proj[active_set].min()

    for i in np.random.permutation(np.where(active_set)[0]):
        xi = X.getrow(i)  
        yi = y[i]
        G = yi * xi.dot(w).item() - 1 + D * alpha[i]

        a = alpha[i]
        if a == 0 and G >= 0:
            continue
        elif 0 < a < np.inf and abs(G) < tol:
            continue

        Qii = xi.multiply(xi).sum() + D
        alpha_old = a
        alpha[i] = max(a - G / Qii, 0)
        delta_alpha = alpha[i] - alpha_old

        if delta_alpha != 0:
            w[xi.indices] += delta_alpha * yi * xi.data

    for i in range(l):
        if not active_set[i]:
            continue
        a = alpha[i]
        if a == 0 and G_proj[i] > bar_G:
            active_set[i] = False
        elif a == np.inf and G_proj[i] < under_G:
            active_set[i] = False

    dcd_with_shrinking_s.shrink_counter += 1
    if dcd_with_shrinking_s.shrink_counter % 10 == 0 or np.sum(active_set) < 0.1 * l:
        active_set[:] = True

    return w, alpha

def generate_synthetic_dataset(n_samples, n_features, type):
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=50,
        n_redundant=0,
        n_classes=2,
        random_state=42
    )

    density = None

    if type == 'sparse':
        X[np.abs(X) < 5.0] = 0
        X = csr_matrix(X)
        density = 100 * X.nnz / (X.shape[0] * X.shape[1])

    y = 2 * y - 1
    return X, y, density

def kkt_violations_s(alpha, X, y, w, D, tol=1e-3):
    violations = 0
    for i in range(len(alpha)):
        xi = X.getrow(i)
        G = y[i] * xi.dot(w).item() - 1 + D * alpha[i]
        if alpha[i] == 0 and G < -tol:
            violations += 1
        elif 0 < alpha[i] < np.inf and abs(G) > tol:
            violations += 1
        elif alpha[i] == np.inf and G > tol:
            violations += 1
    return violations / len(alpha)

def primal_objective_s(w, X, y, C):
    margins = 1 - y * X.dot(w)
    hinge_loss = np.sum(np.maximum(0, margins) ** 2)
    return 0.5 * np.dot(w, w) + C * hinge_loss

def primal_gap_s(w, X, y, C, fP_star):
    fP = primal_objective_s(w, X, y, C)
    return abs(fP - fP_star) / abs(fP_star)

def evaluate_with_criteria_sparse(X, y, methods, C=1.0, tol=1e-3, max_iter=1000):
    D = 1 / (2 * C)
    results = []

    for name, method in methods.items():
        for criterion_name in ['delta_alpha', 'projected_gradient', 'dual_objective']:
            alpha = np.zeros(len(y))
            w = np.zeros(X.shape[1])
            alpha_old = alpha.copy()
            f_old = dual_objective_s(alpha, X, y, D)
            convergence_log = []

            start_time = time.time()

            for it in range(max_iter):
                w, alpha = method(X, y, alpha, w, C, tol, it)

                if criterion_name == 'delta_alpha':
                    delta = np.linalg.norm(alpha - alpha_old)
                    convergence_log.append(delta)
                    if delta < tol:
                        break
                elif criterion_name == 'projected_gradient':
                    G_proj = compute_projected_gradient_s(alpha, X, y, w, D)
                    gap = G_proj.max() - G_proj.min()
                    convergence_log.append(gap)
                    if gap < tol:
                        break
                elif criterion_name == 'dual_objective':
                    f_new = dual_objective_s(alpha, X, y, D)
                    diff = abs(f_new - f_old)
                    convergence_log.append(diff)
                    if diff < tol:
                        break
                    f_old = f_new

                alpha_old = alpha.copy()

            elapsed = time.time() - start_time
            accuracy = accuracy_score(y, np.sign(X @ w))

            kkt_error = kkt_violations_s(alpha, X, y, w, D, tol)
            fP = primal_objective_s(w, X, y, C)

            results.append({
                'Method': name,
                'Criterion': criterion_name,
                'Time': elapsed,
                'Accuracy': accuracy,
                'Iterations': it + 1,
                'Log': convergence_log,
                'KKT_violation': kkt_error,
                'fP': fP,
            })

    fP_star = min(res['fP'] for res in results)
    for res in results:
        res['PrimalGap'] = abs(res['fP'] - fP_star) / abs(fP_star)

    return results

def display_results_table(results):
    df = pd.DataFrame(results)
    display(df)

def plot_convergence(results):
    plt.figure(figsize=(12, 6))
    prop_cycle = plt.rcParams['axes.prop_cycle']
    colors = prop_cycle.by_key()['color']
    extended_colors = plt.get_cmap('tab20').colors  

    for idx, res in enumerate(results):
        label = f"{res['Method']} - {res['Criterion']}"
        iters = list(range(1, len(res['Log']) + 1))
        color = extended_colors[idx % len(extended_colors)]  
        plt.plot(iters, res['Log'], label=label, color=color, linewidth=2)

    plt.yscale('log')
    plt.xlabel("Iterations")
    plt.ylabel("Stopping criterion (log)")
    plt.title("Convergence of stopping criteria")
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

methods_sparse = {
    "Base": dcd_base_s,
    "Random Permutation": dcd_random_perm_s,
    "Online": dcd_online_s,
    "With Shrinking": dcd_with_shrinking_s
}

In [98]:
def generate_synthetic_dataset(n_samples, n_features, type):
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=min(50, n_features),  
        n_redundant=0,
        n_classes=2,
        random_state=42
    )

    density = None

    if type == 'sparse':
        threshold = np.percentile(np.abs(X), 99)
        X[np.abs(X) < threshold] = 0
        X = csr_matrix(X)
        density = 100 * X.nnz / (X.shape[0] * X.shape[1])

    y = 2 * y - 1

    return X, y, density
X_dense, y_dense, _ = generate_synthetic_dataset(n_samples=1000, n_features=100000, type='dense')

X_sparse_raw = X_dense.copy()
threshold = np.percentile(np.abs(X_sparse_raw), 99)
X_sparse_raw[np.abs(X_sparse_raw) < threshold] = 0
X_sparse = csr_matrix(X_sparse_raw)
density = 100 * X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1])
y_sparse = y_dense

density = 100 * X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1])
print(f"Sparse density: {density:.4f}%")

Sparse density: 1.0000%


In [99]:
results_dense = evaluate_with_criteria_dense(X_dense, y_dense, methods_dense)
results_sparse = evaluate_with_criteria_sparse(X_sparse, y_sparse, methods_sparse)

display_results_table(results_dense)
display_results_table(results_sparse)

Unnamed: 0,Method,Criterion,Time,Accuracy,Iterations,Log,KKT_violation,fP,PrimalGap
0,Base,delta_alpha,0.056282,1.0,1,[0.00030043915378078043],0.989,1.746524,372.760607
1,Base,projected_gradient,26.941388,1.0,1000,"[0.819487010840523, 0.0774888352280844, 0.0050...",0.0,0.004673,0.0
2,Base,dual_objective,0.129011,1.0,2,"[0.004893566773632612, 0.00030887055749147675]",0.736,0.052731,10.284492
3,Random Permutation,delta_alpha,0.053894,1.0,1,[0.0003003802637113305],0.986,2.077764,443.646971
4,Random Permutation,projected_gradient,26.451015,1.0,1000,"[0.7376426527540797, 0.09023837230437518, 0.01...",0.0,0.004689,0.003424
5,Random Permutation,dual_objective,0.123612,1.0,2,"[0.004897984538080556, 0.0003538528392896846]",0.883,0.159117,33.051544
6,Online,delta_alpha,9.4e-05,0.515,1,[9.853629505727666e-06],0.999,998.524894,213685.907233
7,Online,projected_gradient,13.082712,0.882,1000,"[1.017857696411867, 1.0168498084074478, 1.0171...",0.975,325.510858,69659.164729
8,Online,dual_objective,0.011229,0.484,1,[4.978879183743565e-06],0.999,999.267055,213844.731486
9,With Shrinking,delta_alpha,0.065332,1.0,1,[0.0003004006998513009],0.982,2.031573,433.761861


Unnamed: 0,Method,Criterion,Time,Accuracy,Iterations,Log,KKT_violation,fP,PrimalGap
0,Base,delta_alpha,0.094914,1.0,2,"[0.0029239465244919943, 0.000741124293999575]",0.951,1.56313,36.378681
1,Base,projected_gradient,13.553284,1.0,1000,"[4.009084695792312, 2.712020942081568, 2.50385...",0.0,0.041819,0.0
2,Base,dual_objective,0.095948,1.0,2,"[0.04363020655708599, 0.0002989313096540616]",0.951,1.56313,36.378681
3,Random Permutation,delta_alpha,0.095086,1.0,2,"[0.0029256880780601847, 0.0007500751898807857]",0.956,6.354649,150.95686
4,Random Permutation,projected_gradient,14.104725,1.0,1000,"[3.9984517879710038, 2.631016510608041, 2.4143...",0.0,0.041829,0.000236
5,Random Permutation,dual_objective,0.192547,1.0,4,"[0.04353861446256941, 0.003081561556857175, 0....",0.792,0.086814,1.075967
6,Online,delta_alpha,8.6e-05,0.509,1,[0.00011611489982723072],0.999,997.835139,23859.939217
7,Online,projected_gradient,0.839846,0.907,1000,"[1.0573513819586438, 1.062977091006955, 1.0941...",0.97,265.604844,6350.330781
8,Online,dual_objective,0.000907,0.582,1,[5.6266271690056696e-05],0.999,993.894936,23765.718309
9,With Shrinking,delta_alpha,0.09531,1.0,2,"[0.0029265279173181454, 0.0007447094226119265]",0.952,6.923464,164.558772


In [100]:
results1000f100000 = []
for i in range(len(results_dense)):
    results1000f100000.append({
        'Method':results_dense[i]['Method'],
        'Criterion': results_dense[i]['Criterion'],
        'Time Dense': results_dense[i]['Time'],
        'Time Sparse': results_sparse[i]['Time'],
        'Accuracy Dense': results_dense[i]['Accuracy'],
        'Accuracy Sparse': results_sparse[i]['Accuracy']
    })

display_results_table(results1000f100000)
    

Unnamed: 0,Method,Criterion,Time Dense,Time Sparse,Accuracy Dense,Accuracy Sparse
0,Base,delta_alpha,0.056282,0.094914,1.0,1.0
1,Base,projected_gradient,26.941388,13.553284,1.0,1.0
2,Base,dual_objective,0.129011,0.095948,1.0,1.0
3,Random Permutation,delta_alpha,0.053894,0.095086,1.0,1.0
4,Random Permutation,projected_gradient,26.451015,14.104725,1.0,1.0
5,Random Permutation,dual_objective,0.123612,0.192547,1.0,1.0
6,Online,delta_alpha,9.4e-05,8.6e-05,0.515,0.509
7,Online,projected_gradient,13.082712,0.839846,0.882,0.907
8,Online,dual_objective,0.011229,0.000907,0.484,0.582
9,With Shrinking,delta_alpha,0.065332,0.09531,1.0,1.0


In [105]:
categories = ['alt.atheism', 'talk.religion.misc']
data = fetch_20newsgroups(subset='train', categories=categories)

vectorizer = TfidfVectorizer(
    max_features=100000,
    stop_words='english',
    min_df=1
)
X_20news = vectorizer.fit_transform(data.data)  

y_20news = data.target
y_20news = 2 * y_20news - 1  

X_20news = X_20news[:1000]
y_20news = y_20news[:1000]

d_20news = X_20news.nnz / (X_20news.shape[0] * X_20news.shape[1])
print(f"Shape: {X_20news.shape}")
print(f"Density: {d_20news:.2%}")
print(f"Classes: {np.unique(y_20news)}")



Shape: (857, 17791)
Density: 0.69%
Classes: [-1  1]


In [112]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer

categories = ['alt.atheism', 'talk.religion.misc']
data = fetch_20newsgroups(subset='train', categories=categories)
X_sparse = TfidfVectorizer(max_features=100000, stop_words='english').fit_transform(data.data)
y_sparse = 2 * data.target[:X_sparse.shape[0]] - 1

from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler

X_dense, y_dense = load_digits(return_X_y=True)
X_dense = X_dense[y_dense < 2]  # Binary: digits 0 vs 1
y_dense = 2 * y_dense[y_dense < 2] - 1
X_dense = StandardScaler().fit_transform(X_dense)

print(len(X_dense), len(X_dense[0]))
print(X_sparse.shape)
print(X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]))

360 64
(857, 17791)
0.006935776463746338


In [113]:
results_dense = evaluate_with_criteria_dense(X_dense, y_dense, methods_dense)
results_sparse = evaluate_with_criteria_sparse(X_sparse, y_sparse, methods_sparse)

display_results_table(results_dense)
display_results_table(results_sparse)

Unnamed: 0,Method,Criterion,Time,Accuracy,Iterations,Log,KKT_violation,fP,PrimalGap
0,Base,delta_alpha,0.011294,1.0,17,"[0.05407135935396502, 0.04724522435137681, 0.0...",0.052778,0.16036,0.034272
1,Base,projected_gradient,0.403933,1.0,1000,"[2.053487961556082, 1.559545888058933, 0.81960...",0.0,0.155048,1e-05
2,Base,dual_objective,0.000755,1.0,3,"[0.1782139802509768, 0.027645702056426025, 0.0...",0.1,0.419367,1.704789
3,Random Permutation,delta_alpha,0.004082,1.0,15,"[0.058996454154680564, 0.04945919774222893, 0....",0.052778,0.159082,0.026027
4,Random Permutation,projected_gradient,0.415881,1.0,1000,"[1.6666293235489524, 0.9530269451273896, 0.750...",0.0,0.155046,0.0
5,Random Permutation,dual_objective,0.002723,1.0,10,"[0.17935019176199526, 0.07427378563282716, 0.0...",0.072222,0.16717,0.078197
6,Online,delta_alpha,4.5e-05,0.972222,6,"[0.010433800154139635, 0.018938505578882662, 0...",0.580556,47.154678,303.132941
7,Online,projected_gradient,0.201554,1.0,1000,"[1.2883470748215526, 1.4835481206918928, 1.889...",0.141667,0.910411,4.871865
8,Online,dual_objective,2.9e-05,0.969444,3,"[0.01106520182665766, 0.009310348131880555, 0....",0.872222,81.634966,525.520028
9,With Shrinking,delta_alpha,0.008672,1.0,17,"[0.052364332797794455, 0.043921307667456576, 0...",0.05,0.156699,0.01066


Unnamed: 0,Method,Criterion,Time,Accuracy,Iterations,Log,KKT_violation,fP,PrimalGap
0,Base,delta_alpha,1.19228,1.0,49,"[11.926686771400576, 4.784692132650964, 1.5493...",0.001167,127.850062,2.482051e-10
1,Base,projected_gradient,8.863589,1.0,1000,"[4.6546724567017925, 3.8238366332734834, 3.538...",0.0,127.850062,0.0
2,Base,dual_objective,1.043417,1.0,38,"[199.00133450509682, 27.931657456155534, 5.642...",0.354726,127.850612,4.30159e-06
3,Random Permutation,delta_alpha,0.28722,1.0,15,"[11.849898404190418, 4.777331141097696, 1.8750...",0.0,127.850066,2.51856e-08
4,Random Permutation,projected_gradient,9.355601,1.0,1000,"[5.069173560783351, 3.8225970151785504, 3.5263...",0.0,127.850065,1.765409e-08
5,Random Permutation,dual_objective,0.306647,1.0,14,"[197.956293504207, 49.21339853664989, 12.34785...",0.0,127.850076,1.101607e-07
6,Online,delta_alpha,0.005846,0.89965,139,"[0.6666666666666669, 0.66263841774745, 0.64782...",0.975496,426.019835,2.332183
7,Online,projected_gradient,0.118724,0.983664,1000,"[1.0000000000000004, 3.0181633120154547, 2.972...",0.87748,179.213945,0.4017509
8,Online,dual_objective,0.011569,0.833139,65,"[0.4444444444444444, 0.4479106914225959, 0.444...",0.991832,591.288448,3.624859
9,With Shrinking,delta_alpha,0.331084,1.0,13,"[11.888034409653962, 4.740840621901948, 1.8651...",0.0,127.850064,1.512049e-08


In [114]:
resultsdig20news = []
for i in range(len(results_dense)):
    resultsdig20news.append({
        'Method':results_dense[i]['Method'],
        'Criterion': results_dense[i]['Criterion'],
        'Time Dense': results_dense[i]['Time'],
        'Time Sparse': results_sparse[i]['Time'],
        'Accuracy Dense': results_dense[i]['Accuracy'],
        'Accuracy Sparse': results_sparse[i]['Accuracy']
    })

display_results_table(resultsdig20news)

Unnamed: 0,Method,Criterion,Time Dense,Time Sparse,Accuracy Dense,Accuracy Sparse
0,Base,delta_alpha,0.011294,1.19228,1.0,1.0
1,Base,projected_gradient,0.403933,8.863589,1.0,1.0
2,Base,dual_objective,0.000755,1.043417,1.0,1.0
3,Random Permutation,delta_alpha,0.004082,0.28722,1.0,1.0
4,Random Permutation,projected_gradient,0.415881,9.355601,1.0,1.0
5,Random Permutation,dual_objective,0.002723,0.306647,1.0,1.0
6,Online,delta_alpha,4.5e-05,0.005846,0.972222,0.89965
7,Online,projected_gradient,0.201554,0.118724,1.0,0.983664
8,Online,dual_objective,2.9e-05,0.011569,0.969444,0.833139
9,With Shrinking,delta_alpha,0.008672,0.331084,1.0,1.0


In [None]:
from sklearn.datasets import fetch_rcv1
import numpy as np

rcv1 = fetch_rcv1()
X = rcv1.data
y = rcv1.target

cat1, cat2 = 33, 45

idx_cat1 = y[:, cat1].toarray().flatten() == 1
idx_cat2 = y[:, cat2].toarray().flatten() == 1

# bierzemy tylko próbki należące wyłącznie do jednej z tych klas (XOR)
idx_either = np.logical_xor(idx_cat1, idx_cat2)

X_bin = X[idx_either]
y_bin = np.where(idx_cat1[idx_either], 1, -1)

# wybieramy 500 z każdej klasy (lub mniej, jeśli mniej jest dostępnych)
n_samples_per_class = 500

idx_class1 = np.where(y_bin == 1)[0][:n_samples_per_class]
idx_class2 = np.where(y_bin == -1)[0][:n_samples_per_class]

selected_idx = np.hstack([idx_class1, idx_class2])

X_bin_subset = X_bin[selected_idx]
y_bin_subset = y_bin[selected_idx]

print(f"Shape sparse X subset: {X_bin_subset.shape}")
print(f"Density: {X_bin_subset.nnz / (X_bin_subset.shape[0] * X_bin_subset.shape[1]):.4%}")


KeyboardInterrupt: 