# ロジスティック回帰(Logistic Regression)

## ロジスティック回帰とは

- 名前に回帰とついているが、分類のアルゴリズム
- 単純パーセプトロンの活性化関数をシグモイド関数(2クラスロジスティック回帰)やソフトマックス関数(多クラスロジスティック回帰)にしたもの
- ここでは多クラスロジスティック回帰を扱う(2クラスロジスティック回帰は[単純パーセプトロン](simple_perceptron.ipynb)を参照)

### scikit-learnの実装

- scikit-learnのlinear_model.LogisticRegressionは、必ず正則化項 (penalty)が入る(正則化不要ならOne vs Restになるが、linear_model.Perceptronを使用)

## 使用方法

### データ準備

In [None]:
import numpy as np
from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=1000, centers=3, random_state=0)
X, y = X.astype(np.float32), y.astype(np.int32)

### scikit-learn

In [None]:
from sklearn.linear_model import LogisticRegression

sk_clf = LogisticRegression(solver='sag', max_iter=100, random_state=0, multi_class='multinomial', n_jobs=-1)
sk_clf.fit(X, y)

### Keras

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical

keras_clf = Sequential()
keras_clf.add(Dense(output_dim=3, input_dim=2))
keras_clf.add(Activation('softmax'))
keras_clf.compile(optimizer=SGD(lr=0.01), loss='categorical_crossentropy')
keras_clf.fit(X, to_categorical(y, nb_classes=3), batch_size=100, nb_epoch=100, verbose=0)

### Chainer

In [None]:
import chainer
import chainer.functions as F
import chainer.links as L
from chainer.datasets.tuple_dataset import TupleDataset
from chainer.iterators import SerialIterator
from chainer.training import StandardUpdater, Trainer

class LogisticRegression(chainer.Chain):

    def __init__(self):
        super(LogisticRegression, self).__init__(
            l1 = L.Linear(2, 3)
        )

    def __call__(self, x):
        return self.l1(x)

train_data = TupleDataset(X, y)
ch_clf = L.Classifier(LogisticRegression())
ch_clf.compute_accuracy = False
optimizer = chainer.optimizers.SGD()
optimizer.setup(ch_clf)
train_iter = SerialIterator(train_data, 100)
updater = StandardUpdater(train_iter, optimizer, device=-1)
trainer = Trainer(updater, (1000, 'iteration'))

trainer.run()

### TensorFlow

In [None]:
import tensorflow as tf
learn = tf.contrib.learn
slim = tf.contrib.slim

def model(x, y):
    y = slim.one_hot_encoding(y, 3)
    logits = slim.fully_connected(x, 3, activation_fn=None)
    loss = slim.losses.softmax_cross_entropy(logits, y)
    train_op = slim.optimize_loss(loss, slim.get_global_step(), learning_rate=0.01, optimizer='SGD')
    prob = slim.softmax(logits)

    return {'class': tf.argmax(prob, 1), 'prob': prob}, loss, train_op

tf_clf = learn.SKCompat(learn.Estimator(model_fn=model))
tf_clf.fit(x=X, y=y, steps=1000, batch_size=100)

## 結果の可視化

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5

resolution = 200
linewidth = min(x_max - x_min, y_max - y_min) / resolution

xx, yy = np.meshgrid(np.linspace(x_min, x_max, resolution), np.linspace(y_min, y_max, resolution))
xx, yy = xx.astype(np.float32), yy.astype(np.float32)
grid = np.c_[xx.ravel(), yy.ravel()]

# scikit-learn
probas = sk_clf.predict_proba(grid)
# Keras
# probas = keras_clf.predict_proba(grid, verbose=0)
# Chainer
# probas = F.softmax(ch_clf.predictor(grid)).data
# TensorFlow
# probas = tf_clf.predict(grid)['prob']

plt.figure(figsize=(4, 4))

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())

plt.scatter(xx.ravel(), yy.ravel(), c=probas, marker='.', alpha=0.6, linewidths=linewidth)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=ListedColormap(['#FF0000', '#00FF00', '#0000FF']))

plt.show()