In [4]:
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
from scipy.sparse import csc_matrix
from get_dataset import get_dataset
from optimizers import Ig, Nesterov, Sgd, Shuffling
from loss import LogisticRegression
from utils import variance_at_opt
import scipy.sparse

In [5]:
def track_optimizer_loss(optimizer, x0, max_iterations):
    x = x0.copy()
    losses = []

    for _ in range(max_iterations):
        optimizer_trace = optimizer.run(x)

        # Handle different types of outputs from the optimizer
        if isinstance(optimizer_trace, list):
            # For a list of outputs, compute the loss for each and take the average
            loss_values = [optimizer.loss.value(x_i.toarray() if scipy.sparse.issparse(x_i) else x_i) for x_i in optimizer_trace]
            avg_loss_value = np.mean(loss_values)
            losses.append(avg_loss_value)
        elif scipy.sparse.issparse(optimizer_trace):
            # For sparse matrix output, convert to dense array
            loss_value = optimizer.loss.value(optimizer_trace.toarray())
            losses.append(loss_value)
        else:
            # For a single dense array output
            loss_value = optimizer.loss.value(optimizer_trace)
            losses.append(loss_value)

    return optimizer_trace, losses




In [6]:
# Setup for plotting
sns.set(style="whitegrid", font_scale=1.2, context="talk", palette=sns.color_palette("bright"))
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['figure.figsize'] = (8, 6)

# Get data and set parameters
dataset = 'w8a.txt'
A, b = get_dataset(dataset)
loss = LogisticRegression(A, b, l1=0, l2=0)
n, dim = A.shape
L = loss.smoothness()
l2 = L / np.sqrt(n)
loss.l2 = l2
x0 = csc_matrix((dim, 1))
n_epoch = 600
batch_size = 512
n_seeds = 1  # Set to 20 in the paper
stoch_it = 250 * n // batch_size

# Run the methods and collect results
results = {}

# Nesterov
nest_str = Nesterov(loss=loss, it_max=n_epoch, mu=l2, strongly_convex=True)
nest_final_x, nest_losses = track_optimizer_loss(nest_str, x0, 1)
results['Nesterov'] = nest_losses
print('Nesterov done')

# Shuffling with different configurations
rr = Shuffling(loss=loss, lr0=1/l2, lr_max=1/loss.batch_smoothness(batch_size), lr_decay_coef=l2/3,
               it_max=stoch_it, n_seeds=n_seeds, batch_size=batch_size)
rr_final_x, rr_losses = track_optimizer_loss(rr, x0, stoch_it)
results['RR'] = rr_losses
print('RR done')

so = Shuffling(loss=loss, lr0=1/l2, lr_max=1/loss.batch_smoothness(batch_size), lr_decay_coef=l2/3,
               it_max=stoch_it, n_seeds=n_seeds, batch_size=batch_size, steps_per_permutation=np.inf)
so_final_x, so_losses = track_optimizer_loss(so, x0, stoch_it)
results['Shuffle-once'] = so_losses
print('SO done')

# Stochastic Gradient Descent
sgd = Sgd(loss=loss, lr_max=1/loss.batch_smoothness(batch_size), lr0=1/l2, lr_decay_coef=l2/2,
          it_max=stoch_it, n_seeds=1, batch_size=batch_size, avoid_cache_miss=True)
sgd_final_x, sgd_losses = track_optimizer_loss(sgd, x0, stoch_it)
results['SGD'] = sgd_losses
print('SGD done')

# Incremental Gradient
ig = Ig(loss=loss, lr0=1/l2, lr_max=1/loss.batch_smoothness(batch_size), lr_decay_coef=l2/3,
        it_max=stoch_it, batch_size=batch_size)
ig_final_x, ig_losses = track_optimizer_loss(ig, x0, stoch_it)
results['IG'] = ig_losses
print('IG done')

# Visualization
plt.figure()
for method, losses in results.items():
    plt.plot(losses, label=method)
plt.yscale('log')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
plt.title('Optimization Loss over Iterations')
plt.show()


Running Optimizer: 100%|██████████| 600/600 [00:01<00:00, 330.70it/s]


AttributeError: 'numpy.ndarray' object has no attribute 'toarray'

In [None]:
print(results['RR'])

In [None]:
# Continuing from the previous part...

# Measure variance
batches = [1, 4, 8] + list(np.unique(np.logspace(1, np.log10(loss.n // 30), num=50, dtype=int)))
n_perms = 10
vars_sgd = []
vars_rr = []
vars_rr_upper = []
vars_rr_lower = []
for b in batches:
    var_sgd, var_rr, var_rr_upper, var_rr_lower = variance_at_opt(x_opt, loss, batch_size=b, n_perms=n_perms)
    vars_sgd.append(var_sgd)
    vars_rr.append(var_rr)
    vars_rr_upper.append(var_rr_upper)
    vars_rr_lower.append(var_rr_lower)

# Visualize variance
plt.figure(figsize=(10, 6))
plt.plot(batches, vars_sgd, label=r'$\sigma_{*}^2$')
plt.plot(batches, vars_rr_upper, label=r'$\frac{\gamma L n}{4}\sigma_{*}^2$, $\gamma=\frac{1}{L}$')
plt.plot(batches, vars_rr_lower, label=r'$\frac{\gamma \mu n}{8}\sigma_{*}^2$, $\gamma=\frac{1}{L}$')
plt.plot(batches, vars_rr, label=r'$\sigma_{\mathrm{shuffe}}^2$, $\gamma=\frac{1}{L}$')
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Batch size')
plt.ylabel('Variance at $x_{\star}$')
plt.legend()
plt.grid(True, which="both", ls="-")
plt.show()


In [None]:
# Histogram visualization
n_perms = 1
vars_so = []
for _ in range(1000):
    lr = 1 / loss.max_smoothness()
    _, var_so, _, _ = variance_at_opt(x_opt, loss, batch_size=batch, n_perms=n_perms, lr=lr)
    vars_so.append(var_so)

# Plot histogram
plt.figure(figsize=(10, 6))
bins = np.logspace(np.log10(np.min(vars_so)), np.log10(np.max(vars_so)), 45)
plt.hist(vars_so, bins=bins, edgecolor='#e0e0e0', linewidth=0., alpha=0.7, log=True)
plt.xlabel(r'Variance at $x_{\star}$')
plt.ylabel('Frequency')
plt.xscale('log')
plt.show()
