<div style="font-size:18pt; padding-top:20px; text-align:center">СЕМИНАР. <b>Логистическая регрессия</b></div><hr>
<div style="text-align:right;">Папулин С.Ю. <span style="font-style: italic;font-weight: bold;">(papulin.study@yandex.ru)</span></div>

### Содержание

1. [Классификация с линейной регрессией](#1.-Классификация-с-линейной-регрессией)
2. [Логистическая регрессия](#2.-Логистическая-регрессия)
3. [Пример с распознаванием цифр](#3.-Пример-с-распознаванием-цифр)
4. [Источники](#4.-Источники)

<p><b>Подключение библиотек</b></p>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from matplotlib import cm

In [None]:
from matplotlib.colors import ListedColormap

In [None]:
from sklearn.datasets import make_classification

## 1. Классификация с линейной регрессией

### Один признак

In [None]:
RANDOM_STATE = 12

In [None]:
def generate_data_one_feature(n=100, random_state=RANDOM_STATE):
    return make_classification(n_samples=n, 
                               n_features=1, 
                               n_redundant=0, 
                               n_informative=1, 
                               n_classes=2, 
                               n_clusters_per_class=1, 
                               class_sep=0.7, 
                               shift=2,
                               random_state=random_state)

In [None]:
# Генерация исходных данных
X, y = generate_data_one_feature()

In [None]:
# График
y_one_indx = np.argwhere(y==1)
y_zero_indx =  np.argwhere(y==0)
color_map = ListedColormap(["blue", "red"])
plt.scatter(X[y_zero_indx], y[y_zero_indx], color="red", label="observed data: 0")
plt.scatter(X[y_one_indx], y[y_one_indx], color="blue", label="observed data: 1")
plt.xlabel("$x$")
plt.ylabel("$y_{true}$")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=RANDOM_STATE)

In [None]:
# Расчет базовой отметки

# Определение количества наблюдений каждого класса
labels, counts = np.unique(y_train, return_counts=True)
print("Labels:", labels)
print("Counts:", counts)

# Выбираем класс с наибольшим количество наблюдений
max_label = counts.argmax()
print("Predict {} to all observations".format(labels[max_label]))

# Предсказания
y_train__pred = np.full(y_train.size, labels[max_label])
y_test__pred = np.full(y_test.size, labels[max_label])

# Доля правильных классификаций
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

In [None]:
# Или можно проще
from sklearn.dummy import DummyClassifier

In [None]:
dummy_model = DummyClassifier(strategy='prior').fit(X_train, y_train)
dummy_model.score(X_test, y_test)

In [None]:
# Линейная регрессия

# Обучение
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Параметры обученной модели
print("Параметры модели:")
print("\tw{} = {}".format(0, linear_model.intercept_))
for indx, coef in enumerate(linear_model.coef_):
    print("\tw{} = {}".format(indx+1, coef))

In [None]:
# Порог для предсказания
threshold = 0.5

# Предсказание
y_train__pred = np.where(linear_model.predict(X_train)>=threshold, 1, 0)
y_test__pred = np.where(linear_model.predict(X_test)>=threshold, 1, 0)

# Оценка качества классификатора
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

In [None]:
# Графики
plt.figure(1, figsize=[12, 4])
plt.subplot(1,2,1)
plt.title("Train")
plt.scatter(X_train, y_train, c=y_train, cmap=color_map, s=80, alpha=0.5, label="true")
plt.scatter(X_train, y_train__pred, s=20, c=y_train__pred, cmap=color_map, label="predicted")
xlim = plt.gca().get_xlim() 
plt.plot(xlim, linear_model.predict(np.array(xlim).reshape(-1,1)), linewidth=1, color="black", label="OLS")
plt.axhline(threshold, color="0.5", linestyle="--", lw=2)
plt.xlim(xlim) 
plt.xlabel("$x$")
plt.ylabel("$y$")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.title("Test")
plt.scatter(X_test, y_test, c=y_test, cmap=color_map, s=80, alpha=0.5, label="true")
plt.scatter(X_test, y_test__pred, s=20, c=y_test__pred, cmap=color_map, label="predicted")
plt.plot(xlim, linear_model.predict(np.array(xlim).reshape(-1,1)), linewidth=1, color="black", label="OLS")
plt.axhline(threshold, color="0.5", linestyle="--", lw=2)
plt.xlim(xlim) 
plt.xlabel("$x$")
plt.ylabel("$y$")
plt.legend()
plt.grid(True)

### Два признака

In [None]:
def generate_data_two_features(n=500, random_state=RANDOM_STATE):
    return make_classification(n_samples=n, 
                               n_features=2, 
                               n_redundant=0, 
                               n_informative=2, 
                               n_clusters_per_class=1, 
                               n_classes=2, 
                               class_sep=0.5,
                               random_state=19)

In [None]:
# Генерация исходных данных
X, y = generate_data_two_features()

In [None]:
# График
y_one_indx = np.argwhere(y==1).flatten()
y_zero_indx =  np.argwhere(y==0).flatten()
plt.scatter(X[y_zero_indx][:,0], X[y_zero_indx][:,1], color="blue", label="$y_{true} = 0$")
plt.scatter(X[y_one_indx][:,0], X[y_one_indx][:,1], color="red", label="$y_{true} = 1$")
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=RANDOM_STATE)

In [None]:
# Расчет базовой отметки
dummy_model = DummyClassifier(strategy='prior').fit(X_train, y_train)
dummy_model.score(X_test, y_test)

In [None]:
# Линейная регрессия

# Обучение
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Параметры обученной модели
print("Параметры модели:")
print("\tw{} = {}".format(0, linear_model.intercept_))
for indx, coef in enumerate(linear_model.coef_):
    print("\tw{} = {}".format(indx+1, coef))

In [None]:
# Порог для предсказания
threshold = 0.5

# Предсказание
y_train__pred = np.where(linear_model.predict(X_train)>=threshold, 1, 0)
y_test__pred = np.where(linear_model.predict(X_test)>=threshold, 1, 0)

# Оценка качества классификатора
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

In [None]:
from sklearn.inspection import DecisionBoundaryDisplay

In [None]:
fig, axes = plt.subplots(1, 2, figsize=[10,4])

disp = DecisionBoundaryDisplay.from_estimator(
    linear_model, X, 
    grid_resolution=1000,
    xlabel="X1",
    ylabel="X2",
    cmap="coolwarm",
    response_method="predict",
    alpha=0.5,
    ax=axes[0]
)
axes[0].set_title("Train")
axes[0].set_xlabel("$x_1$")
axes[0].set_ylabel("$x_2$")
sc_train = axes[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap="coolwarm", s=100, label="true")
axes[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train__pred, cmap="coolwarm", edgecolor="k", label="pred")
axes[0].legend()
fig.colorbar(sc_train, cmap="coolwarm")
axes[0].grid(True)

disp.plot(ax=axes[1], alpha=0.5, cmap="coolwarm")
axes[1].set_title("Test")
axes[1].set_xlabel("$x_1$")
axes[1].set_ylabel("$x_2$")
sc_test = axes[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap="coolwarm", s=100, label="true")
axes[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test__pred, cmap="coolwarm", edgecolor="k", label="pred")
axes[1].legend()
fig.colorbar(sc_test, cmap="coolwarm")
axes[1].grid(True)

fig.tight_layout()

plt.show()

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../lib")
from plot_utils import show_cplots

In [None]:
show_cplots(
    linear_model, 
    X_train, 
    y_train,
    X_test,
    y_test, 
    title=None, 
    cmap="coolwarm", 
    proba=False)

### Что если исходные данные будут вот такие?

In [None]:
# Генерация исходных данных
X, y = generate_data_one_feature()

# Смещаем 2/3 данных класса 1 вправо на 12
indx = np.argwhere(y==1)
indx = indx[:int(2/3*indx.size)]
X[indx] = X[indx] + 8

# График
y_one_indx = np.argwhere(y==1)
y_zero_indx =  np.argwhere(y==0)
color_map = ListedColormap(["blue", "red"])
plt.scatter(X[y_zero_indx], y[y_zero_indx], color="red", label="observed data: 0")
plt.scatter(X[y_one_indx], y[y_one_indx], color="blue", label="observed data: 1")
plt.xlabel("$x$")
plt.ylabel("$y_{true}$")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Линейная регрессия

# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12)

# Обучение
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Параметры обученной модели
print("Параметры модели:")
print("\tw{} = {}".format(0, linear_model.intercept_))
for indx, coef in enumerate(linear_model.coef_):
    print("\tw{} = {}".format(indx+1, coef))
    
# Порог для предсказания
threshold = 0.5

# Предсказание
y_train__pred = np.where(linear_model.predict(X_train)>=threshold, 1, 0)
y_test__pred = np.where(linear_model.predict(X_test)>=threshold, 1, 0)

# Оценка качества классификатора
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

# Графики
plt.figure(1, figsize=[12, 4])
plt.subplot(1,2,1)
plt.title("Train")
plt.scatter(X_train, y_train, c=y_train, cmap=color_map, s=80, alpha=0.5, label="true")
plt.scatter(X_train, y_train__pred, s=20, c=y_train__pred, cmap=color_map, label="predicted")
xlim = plt.gca().get_xlim() 
plt.plot(xlim, linear_model.predict(np.array(xlim).reshape(-1,1)), linewidth=1, color="black", label="OLS")
plt.axhline(threshold, color="0.5", linestyle="--", lw=2)
plt.xlim(xlim) 
plt.xlabel("$x$")
plt.ylabel("$y$")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.title("Test")
plt.scatter(X_test, y_test, c=y_test, cmap=color_map, s=80, alpha=0.5, label="true")
plt.scatter(X_test, y_test__pred, s=20, c=y_test__pred, cmap=color_map, label="predicted")
plt.plot(xlim, linear_model.predict(np.array(xlim).reshape(-1,1)), linewidth=1, color="black", label="OLS")
plt.axhline(threshold, color="0.5", linestyle="--", lw=2)
plt.xlim(xlim) 
plt.xlabel("$x$")
plt.ylabel("$y$")
plt.legend()
plt.grid(True)

## 2. Логистическая регрессия

In [None]:
import ipywidgets as widgets

In [None]:
def linear_function(X, w):
    """Linear regression model with 1 for w0 included."""
    return np.dot(X, w)

def sigmoid(z):
    """Sigmoid function."""
    return 1 / (1 + np.exp(-z))

In [None]:
# Генерация исходных данных
X, y = generate_data_one_feature()

x = np.linspace(-1, 5, num=100)

w0 = widgets.FloatSlider(min=-15.0, max=2.0, step=0.05, value=-2.0)
w1 = widgets.FloatSlider(min=1.0, max=8.0, step=0.05, value=2.0)

def update(w0=0, w1=0):
   
    # Преобразование параметров в массив: w0, w1
    w_ = np.r_[w0, w1]

    # Добавление 1 к признакам: x0, x1, где x0=1
    X_ = np.c_[np.ones(x.shape[0]), x]

    # Вероятность принадлежности классу 1
    y_pred = sigmoid(linear_function(X_, w_))

    plt.figure("2", figsize=[10, 6])  
    plt.scatter(X[y_zero_indx], y[y_zero_indx], color="red", label="$y_{true}=0$")
    plt.scatter(X[y_one_indx], y[y_one_indx], color="blue", label="$y_{true}=1$")
    plt.xlabel("$x$")
    plt.ylabel("$y$")
    plt.plot(x, y_pred, color="grey", linewidth=2, label="Logistic Regression Curve")
    plt.grid(True)
    plt.legend()
    plt.show()

widgets.interact(update, w0=w0, w1=w1)

### Один признак

In [None]:
# Генерация исходных данных
X, y = generate_data_one_feature()

# Смещаем 2/3 данных класса 1 вправо на 12
indx = np.argwhere(y==1)
indx = indx[:int(2/3*indx.size)]
X[indx] = X[indx] + 12

# График
y_one_indx = np.argwhere(y == 1)
y_zero_indx =  np.argwhere(y == 0)
color_map = ListedColormap(["blue", "red"])
plt.scatter(X[y_zero_indx], y[y_zero_indx], color="red", label="$y_{true}=0$")
plt.scatter(X[y_one_indx], y[y_one_indx], color="blue", label="$y_{true}=1$")
plt.xlabel("$x$")
plt.ylabel("$y_{true}$")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

# Обучение
logistic_model = LogisticRegression(fit_intercept=True, 
                                    max_iter=100, 
                                    C=float("inf"), 
                                    solver="lbfgs", 
                                    random_state=RANDOM_STATE)
logistic_model.fit(X_train, y_train)

# Параметры обученной модели
print("Параметры модели:")
print("\tw{} = {}".format(0, logistic_model.intercept_))
for indx, coef in enumerate(logistic_model.coef_):
    print("\tw{} = {}".format(indx+1, coef))

In [None]:
# Предсказание
y_train__pred = logistic_model.predict(X_train)
y_test__pred = logistic_model.predict(X_test)

# Оценка качества классификатора
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

In [None]:
logistic_model.score(X_test, y_test)

In [None]:
# Вероятности принадлежности классам
logistic_model.predict_proba(X_test)[:5]

In [None]:
# Преобразование параметров в массив: w0, w1
w_ = np.r_[logistic_model.intercept_, logistic_model.coef_.flatten()]

# Добавление 1 к признакам: x0, x1, где x0=1
X_ = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# Вероятность принадлежности классу 1
sigmoid(linear_function(X_, w_))[:5]

In [None]:
# Графики
xx = np.linspace(-4, 15, 100).reshape(-1, 1)

plt.figure(1, figsize=[12, 4])
plt.subplot(1,2,1)
plt.title("Train")
plt.scatter(X_train, y_train, c=y_train, cmap=color_map, s=80, alpha=0.5, label="true", zorder=2)
plt.scatter(X_train, y_train__pred, s=20, c=y_train__pred, cmap=color_map, label="predicted", zorder=3)
plt.plot(xx, logistic_model.predict_proba(xx)[:,1], 
         linewidth=2, color="black", label="predicted function", zorder=1)
plt.axhline(threshold, color="0.5", linestyle="--", lw=1)
plt.xlabel("$x$")
plt.ylabel("$y$")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.title("Test")
plt.scatter(X_test, y_test, c=y_test, cmap=color_map, s=80, alpha=0.5, label="true", zorder=2)
plt.scatter(X_test, y_test__pred, s=20, c=y_test__pred, cmap=color_map, label="predicted", zorder=3)
plt.plot(xx, logistic_model.predict_proba(xx)[:,1], 
         linewidth=2, color="black", label="predicted function", zorder=1)
plt.axhline(threshold, color="0.5", linestyle="--", lw=1)
plt.xlabel("$x$")
plt.ylabel("$y$")
plt.legend()
plt.grid(True)

### Два признака

<p><b>Исходные данные</b></p>

In [None]:
# Генерация исходных данных
X, y = generate_data_two_features()

# График
y_one_indx = np.argwhere(y == 1).flatten()
y_zero_indx =  np.argwhere(y == 0).flatten()
plt.scatter(X[y_zero_indx][:,0], X[y_zero_indx][:,1], color="blue", label="$y_{true} = 0$")
plt.scatter(X[y_one_indx][:,0], X[y_one_indx][:,1], color="red", label="$y_{true} = 1$")
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

# Обучение
logistic_model = LogisticRegression(fit_intercept=True, 
                                    max_iter=100, 
                                    C=float("inf"), 
                                    solver="lbfgs", 
                                    random_state=RANDOM_STATE)
logistic_model.fit(X_train, y_train)

# Параметры обученной модели
print("Параметры модели:")
print("\tw{} = {}".format(0, logistic_model.intercept_[0]))
for indx, coef in enumerate(logistic_model.coef_.flatten()):
    print("\tw{} = {}".format(indx+1, coef))

In [None]:
# Предсказание
y_train__pred = logistic_model.predict(X_train)
y_test__pred = logistic_model.predict(X_test)

# Оценка качества классификатора
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

In [None]:
# Вероятности принадлежности классам
logistic_model.predict_proba(X_test)[:5]

In [None]:
# Преобразование параметров в массив: w0, w1, w2
w_ = np.r_[logistic_model.intercept_, logistic_model.coef_.flatten()]

# Добавление 1 к признакам: x0, x1, x2, где x0=1
X_ = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# Вероятность принадлежности классу 1
sigmoid(linear_function(X_, w_))[:5]

In [None]:
fig, axes = plt.subplots(1, 2, figsize=[10,4])

disp = DecisionBoundaryDisplay.from_estimator(
    logistic_model, X, 
    grid_resolution=1000,
    xlabel="X1",
    ylabel="X2",
    cmap="coolwarm",
    response_method="predict_proba",
    alpha=0.5,
    ax=axes[0]
)
axes[0].set_title("Train")
axes[0].set_xlabel("$x_1$")
axes[0].set_ylabel("$x_2$")
sc_train = axes[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap="coolwarm", s=100, label="true")
axes[0].scatter(X_train[:, 0], X_train[:, 1], c=y_train__pred, cmap="coolwarm", edgecolor="k", label="pred")
axes[0].legend()
fig.colorbar(sc_train, cmap="coolwarm")
axes[0].grid(True)

disp.plot(ax=axes[1], alpha=0.5, cmap="coolwarm")
axes[1].set_title("Test")
axes[1].set_xlabel("$x_1$")
axes[1].set_ylabel("$x_2$")
sc_test = axes[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap="coolwarm", s=100, label="true")
axes[1].scatter(X_test[:, 0], X_test[:, 1], c=y_test__pred, cmap="coolwarm", edgecolor="k", label="pred")
axes[1].legend()
fig.colorbar(sc_test, cmap="coolwarm")
axes[1].grid(True)

fig.tight_layout()

plt.show()

In [None]:
show_cplots(
    logistic_model, 
    X_train, 
    y_train,
    X_test,
    y_test, 
    title=None, 
    cmap="coolwarm", 
    proba=True)

### Два признака и три класса

In [None]:
# Генерация исходных данных
n = 100
X, y = make_classification(n_samples=n, n_features=2, n_redundant=0, 
                           n_informative=2, n_clusters_per_class=1, 
                           n_classes=3, class_sep=1,random_state=1234)


# График
y_two_indx = np.argwhere(y == 2).flatten()
y_one_indx = np.argwhere(y == 1).flatten()
y_zero_indx =  np.argwhere(y == 0).flatten()
plt.scatter(X[y_zero_indx][:,0], X[y_zero_indx][:,1], color="blue", label="$y_{true} = 0$")
plt.scatter(X[y_one_indx][:,0], X[y_one_indx][:,1], color="red", label="$y_{true} = 1$")
plt.scatter(X[y_two_indx][:,0], X[y_two_indx][:,1], color="green", label="$y_{true} = 2$")
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

# Обучение
logistic_model = LogisticRegression(fit_intercept=True, 
                                    max_iter=100, 
                                    C=float("inf"), 
                                    solver="lbfgs", 
                                    multi_class="ovr", 
                                    random_state=RANDOM_STATE)
logistic_model.fit(X_train, y_train)

# Параметры обученной модели
for i in range(3):
    print("Параметры модели {}:".format(i+1))
    print("\tw{} = {}".format(0, logistic_model.intercept_[i]))
    for indx, coef in enumerate(logistic_model.coef_[i].flatten()):
        print("\tw{} = {}".format(indx+1, coef))

In [None]:
# Предсказание
y_train__pred = logistic_model.predict(X_train)
y_test__pred = logistic_model.predict(X_test)

# Оценка качества классификатора
accuracy_train = accuracy_score(y_train, y_train__pred)
accuracy_test = accuracy_score(y_test, y_test__pred)

print("Train Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)

In [None]:
logistic_model.predict_proba(X_test)[:5]

In [None]:
pr = np.around(logistic_model.predict_proba(X_test), decimals=2)
pr = np.column_stack((pr, logistic_model.predict(X_test), y_test))

# Class_0, Class_1, Class_2, Predicted, True
pr[:5]

In [None]:
color_map = ListedColormap(["blue", "red", "green"])
show_cplots(
    logistic_model, 
    X_train, 
    y_train,
    X_test,
    y_test, 
    title=None, 
    cmap=color_map, 
    show_colorbar=False)

## 3. Пример с распознаванием цифр

In [None]:
from sklearn import datasets

In [None]:
# Загрузка исходных данных
digits = datasets.load_digits()
print(digits.DESCR)

In [None]:
digits.keys()

In [None]:
IMAGE_INDX = 3

print("Features:\n", digits["images"][IMAGE_INDX])
print("Target value:", digits.target[IMAGE_INDX])

plt.imshow(digits.images[IMAGE_INDX])
plt.show()

In [None]:
digits.images.shape

In [None]:
# Преобразование исходных данных
# Замечание: 
#  digits.data уже содержит преобразованные данные
X = digits["images"].reshape(len(digits["images"]), 64)
X.shape

In [None]:
X

In [None]:
y = digits["target"]
y.shape

In [None]:
# Количество элементов каждого класса
np.unique(y, return_counts=True)

In [None]:
# Формирование обучающего и тестового подмножеств
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=100)

# Обучение модели
# Замечание: Замените потом значения multi_class и 
# solver на multinomial и newton-cg. Как изменится 
# качество классификации?
model = LogisticRegression(C=float("inf"), 
                           multi_class="ovr", 
                           solver="lbfgs", 
                           max_iter=200, 
                           random_state=12345)
model.fit(X_train, y_train)

# Параметры обученной модели
for i in range(10):
    print("Параметры модели {}:".format(i+1))
    print("\tw{} = {}".format(0, model.intercept_[i]))
    for indx, coef in enumerate(model.coef_[i].flatten()):
        print("\tw{} = {}".format(indx+1, coef))

In [None]:
# Сумма векторов параметров
np.allclose(model.coef_.sum(axis=0), np.zeros(model.coef_.shape[1]))

In [None]:
# Оценка качества модели
train_error = model.score(X_train, y_train)
test_error = model.score(X_test, y_test)

print("Train Accuracy:", train_error)
print("Test Accuracy:", test_error)

In [None]:
IMAGE_INDX = 3

In [None]:
# Предсказание и отображение цифры
print("Target value:", digits.target[IMAGE_INDX])
print("Predicted value:", model.predict(digits["images"][IMAGE_INDX].reshape(1, -1)))
plt.imshow(digits.images[IMAGE_INDX])
plt.show()

In [None]:
# Веса классов
plt.figure(figsize=(10,5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.title(f"class = {model.classes_[i]}")
    plt.imshow(model.coef_[i].reshape(-1,8)) #, vmin=-200, vmax=200)
    # plt.colorbar()
    plt.axis("off")
plt.tight_layout()
plt.show()

In [None]:
# Линейная комбинация изображения и весов
plt.figure(figsize=(10,5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    h = model.coef_[i].reshape(-1,8) * digits.images[IMAGE_INDX]
    plt.title(f"class = {model.classes_[i]}\nsum = {h.sum():.2f}")
    plt.imshow(h) #, vmin=-200, vmax=200)
    # plt.colorbar()
    plt.axis("off")
plt.tight_layout()
plt.show()

## 4. Источники

- [Plot randomly generated classification dataset](http://scikit-learn.org/stable/auto_examples/datasets/plot_random_dataset.html#example-datasets-plot-random-dataset-py)
- [Classifier comparison](http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py)
- [LogisticRegression](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)