In [2]:
2+1111111111

1111111113

In [1]:
import sys
# Προσθέτουμε το υποφάκελο pytorch στο path ώστε να μπορούμε να κάνουμε import
sys.path.append("./pytorch")

import matplotlib
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt
import numpy.linalg as la
from sklearn.datasets import load_svmlight_file

from optimizers import Gd, Nesterov, Adgd, AdgdAccel, Bb, Polyak
from loss_functions import logistic_loss, logistic_gradient

# --- Προσθήκη των custom AdGrad υλοποιήσεων από pytorch/optimizer_adgrad*.py ---
from optimizer_adgrad    import AdsgdAdGrad
from optimizer_adgrad_nesterov import AdsgdAdGradNesterov

sns.set(style="whitegrid", font_scale=1.2, context="talk", 
        palette=sns.color_palette("bright"), color_codes=False)
matplotlib.rcParams['mathtext.fontset'] = 'cm'

dataset = 'covtype'
data_path = './datasets/' + dataset
if dataset == 'covtype':
    data_path += '.bz2'

if dataset == 'covtype':
    it_max = 10000
else:
    it_max = 3000

def logistic_smoothness(X):
    return 0.25 * np.max(la.eigvalsh(X.T @ X / X.shape[0]))


data = load_svmlight_file(data_path)
X, y = data[0].toarray(), data[1]
if (np.unique(y) == [1, 2]).all():
    # Loss functions support only labels from {0, 1}
    y -= 1
n, d = X.shape
L = logistic_smoothness(X)
l2 = L / n if dataset == 'covtype' else L / (10 * n) 
w0 = np.zeros(d)

def loss_func(w):
    return logistic_loss(w, X, y, l2)

def grad_func(w):
    return logistic_gradient(w, X, y, l2)


In [None]:
gd = Gd(lr=1 / L, loss_func=loss_func, grad_func=grad_func, it_max=it_max)
gd.run(w0=w0)

In [None]:
nest = Nesterov(lr=1 / L, loss_func=loss_func, grad_func=grad_func, it_max=it_max)
nest.run(w0=w0)

In [None]:
nest_str = Nesterov(lr=1 / L, strongly_convex=True, mu=l2, loss_func=loss_func, grad_func=grad_func, it_max=it_max)
nest_str.run(w0=w0)

In [None]:
adgd = Adgd(loss_func=loss_func, grad_func=grad_func, eps=0, it_max=it_max)
adgd.run(w0=w0)

In [None]:
ad_acc = AdgdAccel(loss_func=loss_func, grad_func=grad_func, it_max=it_max)
ad_acc.run(w0=w0)

In [None]:
adgrad_orig = AdsgdAdGrad(
    loss_func=loss_func, grad_func=grad_func, eps=0, it_max=it_max, tau_rule='original'
)
adgrad_mod  = AdsgdAdGrad(
    loss_func=loss_func, grad_func=grad_func, eps=0, it_max=it_max, tau_rule='mod'
)

In [None]:
adgradnes_orig = AdsgdAdGradNesterov(
    loss_func=loss_func, grad_func=grad_func, eps=0, it_max=it_max, tau_rule='original'
)
adgradnes_mod  = AdsgdAdGradNesterov(
    loss_func=loss_func, grad_func=grad_func, eps=0, it_max=it_max, tau_rule='mod'
)

In [None]:
# Run original and modified AdGrad variants
adgrad_orig.run(w0=w0)
adgrad_mod.run(w0=w0)


In [None]:
# Run original and modified AdGrad-Nesterov variants
adgradnes_orig.run(w0=w0)
adgradnes_mod.run(w0=w0)


In [None]:
# optimizers = [gd, nest, adgd, ad_acc, nest_str]
optimizers = [
    gd, nest, adgd, ad_acc, nest_str,
    adgrad_orig, adgrad_mod,
    adgradnes_orig, adgradnes_mod
]
# markers = [',', 'o', '*', '^', 'D', 's', '.', 'X']

labels = [
    'GD', 'Nesterov', 'AdGD', 'AdGD-accel', 'Nesterov-strong',
    'AdGrad-orig', 'AdGrad-mod',
    'AdGradNes-orig', 'AdGradNes-mod'
]
markers = [',','o','*','^','D','s','.','X','v']

# for opt, marker in zip(optimizers, markers):
#     opt.compute_loss_on_iterates()
# f_star = np.min([np.min(opt.losses) for opt in optimizers])

# υπολόγισε f*
for opt in optimizers:
    opt.compute_loss_on_iterates()
f_star = np.min([np.min(opt.losses) for opt in optimizers])

plt.figure(figsize=(8, 6))
for opt, marker, label in zip(optimizers, markers, labels):
    opt.plot_losses(marker=marker, f_star=f_star, label=label)

plt.yscale('log')
plt.xlabel('Iteration')
plt.ylabel(r'$f(x^k) - f_*$')
plt.legend()
plt.tight_layout()
plt.show()

# plt.figure(figsize=(8, 6))
# labels = ['GD', 'Nesterov', 'AdGD', 'AdGD-accel', 'Nesterov-strong']
# for opt, marker, label in zip(optimizers, markers + ['.', 'X'], labels):
#     opt.plot_losses(marker=marker, f_star=f_star, label=label)
# plt.yscale('log')
# plt.xlabel('Iteration')
# plt.ylabel(r'$f(x^k) - f_*$')
# plt.legend()
# plt.tight_layout()
# plt.show()

In [None]:
adgd = Adgd(loss_func=loss_func, grad_func=grad_func, eps=0, it_max=500)
adgd.run(w0=w0)
plt.figure(figsize=(8, 6))
my_green = sns.color_palette('bright')[2]
plt.plot(adgd.its[2:], adgd.lrs[2:], label='AdGD', color=my_green)
plt.axhline(y=1 / L, color='black', label='$1/L$')
plt.axhline(y=1 / l2, color='red', label='$1/\mu$')
plt.yscale('log')
plt.xlabel('Iteration')
plt.ylabel('$\lambda_k$')
plt.legend()
plt.tight_layout()
plt.show()