# <center> Graphs for Logistic Regression
    
This notebook loads the results for Logistic regression and plot/save the different graphs.

### Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import matplotlib as mpl
from scripts.utils import simu_block_log
import warnings
warnings.filterwarnings("ignore")

### Configuration for graphics

In [None]:
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "sans-serif",
    "font.sans-serif": ["Helvetica"]})
## for Palatino and other serif fonts use:
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
})

### Load data

In [None]:
puiss = 2
#puiss = 5

In [None]:
loss_sgd = np.load('results/LOGISTIC/loss_sgd_alpha{}.npy'.format(puiss))
loss_uni = np.load('results/LOGISTIC/loss_uni_alpha{}.npy'.format(puiss))
loss_bia = np.load('results/LOGISTIC/loss_bia_alpha{}.npy'.format(puiss))
loss_unb = np.load('results/LOGISTIC/loss_unb_alpha{}.npy'.format(puiss))

In [None]:
mean_sgd = np.mean(loss_sgd,axis=0)
mean_uni = np.mean(loss_uni,axis=0)
mean_bia = np.mean(loss_bia,axis=0)
mean_unb = np.mean(loss_unb,axis=0)

In [None]:
# Parameters
n_samples = 10000   # number of samples
n_features = 250    # dimension of the problem
𝜆 = 1/(n_samples)#regularization parameter
# Simulate data for regression
seed=0 
noise=0.01
block_size=1

X,y=simu_block_log(seed=seed,n_samples=n_samples,n_features=n_features,
                     puiss=puiss,block_size=block_size,noise=noise)

c = 1/(n_samples*λ)
log_sk = LogisticRegression(C=c,fit_intercept=False,tol=1e-3)
# fit sklearn model
log_sk.fit(X=X,y=y)
coeff = log_sk.coef_[0]

data_term  = np.log(1+np.exp(np.multiply(-y,X@coeff))).mean()
reg_term = (𝜆/2)*sum(coeff**2)
print('data_term:',data_term)
print('reg_term :',reg_term)
# Optimal loss
loss_opt = data_term + reg_term
print('loss_opt :',loss_opt)

In [None]:
#plt.figure()
fig,ax = plt.subplots(figsize=(5,5))
plt.plot(mean_sgd-loss_opt,color='b',label='SGD')
plt.plot(mean_uni-loss_opt,color='g',label='Uniform')
plt.plot(mean_unb-loss_opt,color='darkorange',label='Adaptive')
plt.plot(mean_bia-loss_opt,color='r',label='Musketeer')
plt.yscale('log')
plt.xscale('log')
plt.ylabel(r'Optimaliy Gap $f(\theta_t)-f(\theta^\star)$',fontsize=15)
plt.xlabel('Passes over coordinates',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.legend(fontsize=15)
plt.legend(loc='lower left',fontsize=15)
plt.grid(linestyle='--',which='both',alpha=0.5)
#plt.title('Logistic Regression',fontsize=15)
plt.tight_layout()
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
#plt.savefig('logistic_alpha2.pdf')
#plt.savefig('logistic_alpha5.pdf')
filename = 'logistic_a5.pdf'
#plt.savefig(filename,bbox_inches='tight',transparent=True, pad_inches=0)
plt.show()