In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
# Only use this if running the notebook on your local machine
#plt.style.use('notebook.mplstyle')

In [None]:
# Generate random 2-d data for classification
n_per_class = 20
mu_class1 = -2
mu_class2 = 2
x_class1 =  np.random.randn(n_per_class, 2) + mu_class1*np.array([0.5, 1])
x_class2 =  np.random.randn(n_per_class, 2) + mu_class2*np.array([0.5, 1])
X = np.vstack([x_class1, x_class2])
y = np.vstack([np.zeros([n_per_class, 1]), np.ones([n_per_class, 1])])

# Include a scaling factor that makes the variation along
# one dimension dominate.
scale_factor = 10
X_scaled = X * np.array([scale_factor, 1])

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(X_scaled[y.flatten()==0, 0], X_scaled[y.flatten()==0, 1], 'o', alpha=0.75, label='Class 1')
ax.plot(X_scaled[y.flatten()==1, 0], X_scaled[y.flatten()==1, 1], 'o', alpha=0.75, label='Class 2')
ax_lim = scale_factor*np.array([mu_class1-3, mu_class2+2])
ax.set(xlabel='$x_1$', ylabel='$x_2$', xlim=ax_lim, ylim=ax_lim)
ax.set_ylim(ax.get_xlim())
ax.legend();

In [None]:
log_reg = LogisticRegression(C=1)

# Check how the number of iterations required to reach optimal
# parameters vary as a function of the scaling asymetry.
n_iterations = []
scale_factors = np.logspace(0, 4, 21)
for scale_factor in scale_factors:
    X_scaled_tmp = X * np.array([scale_factor, 1])
    log_reg.fit(X_scaled_tmp, y.flatten())
    n_iterations.append(log_reg.n_iter_)

# Visualize the result
fig, ax = plt.subplots(1, 1)
ax.plot(scale_factors, n_iterations, 'ko-')
ax.set(xscale='log', 
       xlabel='Scale difference between $x_1$ and $x_2$',
       ylabel='Number of iterations');

In [None]:
# A typical approach is to always rescale your data
# so that all features either vary between a fixed range,
# usually [-1, 1] or [0, 1], or are normalized. Normalized 
# usually mean mean-centered (zero mean) and scaled to unit 
# variance.

# Mean-centering and unit-variance scaling is easily accomplished
# with a StandardScaler from scikit-learn as
scaler = StandardScaler()
X_ss = scaler.fit_transform(X_scaled)
print(X_ss.mean(axis=0))
print(X_ss.std(axis=0))

# Visualize what we have
fig, ax = plt.subplots(1, 1)
ax.plot(X_ss[y.flatten()==0, 0], X_ss[y.flatten()==0, 1], 'o', alpha=0.75, label='Class 1')
ax.plot(X_ss[y.flatten()==1, 0], X_ss[y.flatten()==1, 1], 'o', alpha=0.75, label='Class 2')
ax_lim = scale_factor*np.array([mu_class1-3, mu_class2+2])
ax.set(xlabel='$x_1$', ylabel='$x_2$')
ax.set_ylim(ax.get_xlim())
ax.legend();

In [None]:
# Redo the the comparison above but by rescaling using the standard scaler
n_iterations = []
scale_factors = np.logspace(0, 4, 21)
for scale_factor in scale_factors:
    X_scaled = X * np.array([scale_factor, 1])
    X_rescaled = scaler.fit_transform(X_scaled)
    log_reg.fit(X_rescaled, y.flatten())
    n_iterations.append(log_reg.n_iter_)

# Visualize the result
fig, ax = plt.subplots(1, 1)
ax.plot(scale_factors, n_iterations, 'ko-')
ax.set(xscale='log', 
       xlabel='Scale difference between $x_1$ and $x_2$',
       ylabel='Number of iterations');