<div style="font-size:18pt; padding-top:20px; text-align:center">СЕМИНАР. <b>Классификация</b></div><hr>
<div style="text-align:right;">Папулин С.Ю. <span style="font-style: italic;font-weight: bold;">(papulin.study@yandex.ru)</span></div>

<a name="0"></a>
<div><span style="font-size:14pt; font-weight:bold">Содержание</span>
    <ol>
        <li><a href="#1">Бинарная классификация</a>
        <li><a href="#2">Многоклассовая классификация</a>
        <li><a href="#3">Источники</a>
        </li>
    </ol>
</div>

<p><b>Подключение библиотек</b></p>

In [None]:
from sklearn.datasets import make_classification

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import cm
%matplotlib inline

In [None]:
import sys
sys.path.insert(0, "../lib/")
from plot_utils import CPlot, RPlot

<a name="1"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:16pt; font-weight:bold">1. Бинарная классификация</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

<p><b>Исходные данные</b></p>

In [None]:
n = 500
x, y = make_classification(n_samples=n, n_features=2, n_redundant=0, 
                           n_informative=2, n_clusters_per_class=1, n_classes=2, class_sep=0.25,
                           random_state=19)

In [None]:
CLR_MAP = ListedColormap(["blue", "red"])

In [None]:
CPlot.show_init_data_plot(x, y, CLR_MAP)

Формирование обучающего и тестового подмножеств

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=12)

#### Логистическая регрессия

In [None]:
# Обучение
logreg_model = LogisticRegression(penalty="l2", fit_intercept=True, max_iter=100, C=1e5, 
                                         solver="lbfgs", random_state=1234)

get_ipython().run_line_magic('timeit', 'logreg_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_logreg_model = logreg_model.score(X_test, y_test)
print("Test Accuracy = ", score_logreg_model)

#### Метод ближайших соседей

In [None]:
# Обучение
knn_model = KNeighborsClassifier(n_neighbors=5, p=2)

get_ipython().run_line_magic('timeit', 'knn_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_knn_model = knn_model.score(X_test, y_test)
print(f"Test Accuracy = {score_knn_model}")

#### Байесовский классификатор

In [None]:
# Обучение
bayes_model = GaussianNB()

get_ipython().run_line_magic('timeit', 'logreg_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_bayes_model = bayes_model.score(X_test, y_test)
print(f"Test Accuracy = {score_bayes_model}")

#### Метод опорных векторов

In [None]:
# Обучение
svm_model = SVC(kernel="linear")

get_ipython().run_line_magic('timeit', 'svm_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_svm_model = svm_model.score(X_test, y_test)
print(f"Test Accuracy = {score_svm_model}")

#### Случайный лес

In [None]:
# Обучение
rf_model = RandomForestClassifier(n_estimators=10, criterion="entropy", bootstrap=True, 
                                  oob_score=False, random_state=0)

get_ipython().run_line_magic('timeit', 'rf_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_rf_model = rf_model.score(X_test, y_test)
print("Test Score = ", score_rf_model)

<p><b>Графики</b></p>

In [None]:
CPlot.show_train_test_plots(logreg_model, X_train, y_train, X_test, y_test, 
                            title="Logistic Regression", cmap=CLR_MAP)
CPlot.show_train_test_plots(knn_model, X_train, y_train, X_test, y_test, 
                            title="K-Nearest Neighbors", cmap=CLR_MAP)
CPlot.show_train_test_plots(bayes_model, X_train, y_train, X_test, y_test, 
                            title="Gaussian Naive Bayes", cmap=CLR_MAP)
CPlot.show_train_test_plots(svm_model, X_train, y_train, X_test, y_test, 
                            title="Support Vector Machine", cmap=CLR_MAP)
CPlot.show_train_test_plots(rf_model, X_train, y_train, X_test, y_test, 
                            title="Random Forest", cmap=CLR_MAP)

In [None]:
CPlot.show_train_test_plots(logreg_model, X_train, y_train, X_test, y_test, 
                            title="Logistic Regression", cmap=CLR_MAP, proba=True)
CPlot.show_train_test_plots(knn_model, X_train, y_train, X_test, y_test, 
                            title="K-Nearest Neighbors", cmap=CLR_MAP, proba=True)
CPlot.show_train_test_plots(bayes_model, X_train, y_train, X_test, y_test, 
                            title="Gaussian Naive Bayes", cmap=CLR_MAP, proba=True)
CPlot.show_train_test_plots(svm_model, X_train, y_train, X_test, y_test, 
                            title="Support Vector Machine", cmap=CLR_MAP, proba=True)
CPlot.show_train_test_plots(rf_model, X_train, y_train, X_test, y_test, 
                            title="Random Forest", cmap=CLR_MAP, proba=True)

<a name="2"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:16pt; font-weight:bold">2. Многоклассовая классификация</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

<p><b>Исходные данные</b></p>

In [None]:
n = 100
X, y = make_classification(n_samples=n, n_features=2, n_redundant=0, 
                           n_informative=2, n_clusters_per_class=1, n_classes=3, class_sep=1,
                           random_state=1234)

In [None]:
CLR_MAP = ListedColormap(["blue", "red", "green"])

In [None]:
CPlot.show_init_data_plot(X, y, CLR_MAP)

Формирование обучающего и тестового подмножеств

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=12)

#### Логистическая регрессия

In [None]:
# Обучение
logreg_model = LogisticRegression(penalty="l2", fit_intercept=True, max_iter=100, C=1e5, 
                                  solver="lbfgs", multi_class="multinomial", random_state=1234)

get_ipython().run_line_magic('timeit', 'logreg_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_logreg_model = logreg_model.score(X_test, y_test)
print("Test Accuracy = ", score_logreg_model)

#### Метод ближайших соседей

In [None]:
# Обучение
knn_model = KNeighborsClassifier(n_neighbors=5, p=2)

get_ipython().run_line_magic('timeit', 'knn_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_knn_model = knn_model.score(X_test, y_test)
print(f"Test Accuracy = {score_knn_model}")

#### Байесовский классификатор

In [None]:
# Обучение
bayes_model = GaussianNB()

get_ipython().run_line_magic('timeit', 'bayes_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_bayes_model = bayes_model.score(X_test, y_test)
print(f"Test Accuracy = {score_bayes_model}")

#### Метод опорных векторов

In [None]:
# Обучение
svm_model = SVC(kernel="linear")

get_ipython().run_line_magic('timeit', 'svm_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_svm_model = svm_model.score(X_test, y_test)
print(f"Test Accuracy = {score_svm_model}")

#### Случайный лес

In [None]:
# Обучение
rf_model = RandomForestClassifier(n_estimators=10, criterion="entropy", bootstrap=True, 
                                  oob_score=False, random_state=0)

get_ipython().run_line_magic('timeit', 'rf_model.fit(X_train, y_train)')

# Проверка на тестовом подмножестве
score_rf_model = rf_model.score(X_test, y_test)
print(f"Test Score = {score_rf_model}")

<p><b>Графики</b></p>

In [None]:
CPlot.show_train_test_plots(logreg_model, X_train, y_train, X_test, y_test, 
                            title="Logistic Regression", cmap=CLR_MAP)
CPlot.show_train_test_plots(knn_model, X_train, y_train, X_test, y_test, 
                            title="K-Nearest Neighbors", cmap=CLR_MAP)
CPlot.show_train_test_plots(bayes_model, X_train, y_train, X_test, y_test, 
                            title="Gaussian Naive Bayes", cmap=CLR_MAP)
CPlot.show_train_test_plots(svm_model, X_train, y_train, X_test, y_test, 
                            title="Support Vector Machine", cmap=CLR_MAP)
CPlot.show_train_test_plots(rf_model, X_train, y_train, X_test, y_test, 
                            title="Random Forest", cmap=CLR_MAP)

<a name="3"></a>
<div style="display:table; width:100%; padding-top:10px; padding-bottom:10px; border-bottom:1px solid lightgrey">
    <div style="display:table-row">
        <div style="display:table-cell; width:80%; font-size:14pt; font-weight:bold">3. Источники</div>
    	<div style="display:table-cell; width:20%; text-align:center; background-color:whitesmoke; border:1px solid lightgrey"><a href="#0">К содержанию</a></div>
    </div>
</div>

<a href="http://scikit-learn.org/stable/auto_examples/datasets/plot_random_dataset.html#example-datasets-plot-random-dataset-py">Plot randomly generated classification dataset</a><br>
<a href="http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#example-classification-plot-classifier-comparison-py">Classifier comparison</a>