# L2正則化(Ridge Regression)

## L2正則化とは<a name="description"></a>

- 最も代表的な正則化手法
- パラメーターの値を小さく保つよう学習
- 重み(荷重)減衰ともいう
- 過学習を防ぎ汎化能力を上げる
- 線形回帰にL2正則化を適用したものをリッジ回帰(Ridge Regression)と呼ぶ

## 使用方法<a name="example"></a>

### データ準備<a name="data"></a>

In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt

n_sample = 20
n_dim = 20
n_valid_feature = 5

n_feature = 10

np.random.seed(0)

def get_multinomial(x, dim):
    return PolynomialFeatures(degree=dim, include_bias=False).fit_transform(x[:, np.newaxis])

x = np.linspace(-1, 1, n_sample)
X = get_multinomial(x, n_dim)
all_features = np.arange(X.shape[1])
np.random.shuffle(all_features)
valid_features = all_features[:n_valid_feature]
coef = np.random.uniform(-1, 1, (n_valid_feature, 1))
noise = np.random.normal(0, .1, (n_sample))
y = X[:, valid_features].dot(coef).flatten() + noise

plt.figure(figsize=(4, 4))

plt.scatter(x, y)

x_min, x_max = x.min() - .1, x.max() + .1
y_min, y_max = y.min() - .1, y.max() + .1

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())

plt.show()

### 学習<a name="training"></a>

In [None]:
from sklearn.linear_model import LinearRegression, Ridge

linear = LinearRegression().fit(X, y)
ridge = Ridge(alpha=0.01, normalize=True)
ridge.fit(X, y)

### 可視化<a name="visualization"></a>

In [None]:
real_coef = np.zeros(n_dim)
real_coef[valid_features] = coef[:, 0]
for i in range(n_dim):
    print('Weight{n:>2}: Real = {coef:>6.3f}, Regularized = {ridge:>6.3f}, Not Regularized = {linear:.3f}'.format(n=i, coef=real_coef[i], ridge=ridge.coef_[i], linear=linear.coef_[i]))

sample_x = np.linspace(x_min, x_max, 50)
sample_X = get_multinomial(sample_x, n_dim)

fig, axes = plt.subplots(1, 2, figsize=(6,3))

for ax, title, model in zip(axes, ['Not Regularized', 'Regularized'], [linear, ridge]):
    ax.set_title(title)
    ax.set_xlim(x_min, x_max)
    ax.set_ylim(y_min, y_max)
    ax.set_xticks(())
    ax.set_yticks(())

    ax.scatter(x, y)
    ax.plot(sample_x, model.predict(sample_X))

plt.show()

### 重みの変化<a name="weights"></a>

In [None]:
n = 50
param = np.logspace(-4, 4, num=n)
history = np.zeros((n_dim, n))

for i, l in enumerate(param):
    history[:, i] = Ridge(alpha=l, normalize=True).fit(X, y).coef_

plt.figure(figsize=(4, 4))

for w in history:
    plt.plot(param, w)

plt.ylabel('weight')
plt.xlabel('lambda')
plt.xscale('log')
plt.yticks(())

plt.show()

## 仕組み<a name="mechanism"></a>

### 正則化項<a name="definition"></a>

$\begin{eqnarray}
    \frac{\lambda}{2}\|w\|^2=\frac{\lambda}{2}\sum_{j=1}^mw_j^2 \nonumber
\end{eqnarray}$

### イメージ<a name="intuition"></a>

L2正則化項は原点を中心とした同心円状の勾配を持つので、正則化項を強めると極端な重みが抑制される。

In [None]:
w1, w2 = np.meshgrid(np.linspace(0, 1, 50), np.linspace(0, 1, 50))
grid = grid = np.c_[w1.ravel(), w2.ravel()]
center = (.4, .6)
loss = (grid - center) ** 2
loss[:, 0] *= .5
loss = (loss.sum(axis=1) + (grid[:, 0] - center[0]) * (grid[:, 1] - center[1])).reshape(w1.shape)
l2 = (grid ** 2).sum(axis=1).reshape(w1.shape)
objective = loss + l2

fig = plt.figure(figsize=(9, 3))

ax1 = fig.add_subplot(1, 3, 1)
ax1.contour(w1, w2, loss, 20)
ax1.set_title('Error')

ax2 = fig.add_subplot(1, 3, 2)
ax2.contour(w1, w2, l2, 20)
ax2.set_title('L2 penalty')

ax3 = fig.add_subplot(1, 3, 3)
ax3.contour(w1, w2, objective, 20)
ax3.set_title('Total cost')

plt.show()