# Применение линейных методов классификации для диагностики кризиса теплообмена в ЯЭУ.
Группа: ИВТ-М20.<br/>
Студент: Лискунов Роман Геннадьвич

In [126]:
import warnings
from typing import (
    Any,
    Union
)
from icecream import ic
from pandas import (
    DataFrame,
    read_csv
)
from sklearn.linear_model import (
    LogisticRegression,
    SGDClassifier,
    RidgeClassifier
)
from sklearn.metrics import (
    accuracy_score,
    classification_report
)
from sklearn.model_selection import (
    train_test_split,
    cross_val_score,
    GridSearchCV
)

warnings.filterwarnings('ignore')


(1) Чтение данных из файлов

In [127]:
target: DataFrame = DataFrame(read_csv('data/target5.csv', header=0))
target.columns = ['data']
var: DataFrame = DataFrame(read_csv('data/var5.csv', sep=',', header=None))

(2) Транспонирование матрицы

In [128]:
targetT: DataFrame = target.T
varT: DataFrame = var.T

(3) Отображение первых и последних записей

In [129]:
print(targetT.head())
print(targetT.tail())
print(varT.head())
print(varT.tail())

      0   1   2   3   4   5   6   7   8   9   ...  20  21  22  23  24  25  26  \
data   1   1   1   1   1   1   1   1   1   1  ...   2   2   2   2   2   2   2   

      27  28  29  
data   2   2   2  

[1 rows x 30 columns]
      0   1   2   3   4   5   6   7   8   9   ...  20  21  22  23  24  25  26  \
data   1   1   1   1   1   1   1   1   1   1  ...   2   2   2   2   2   2   2   

      27  28  29  
data   2   2   2  

[1 rows x 30 columns]
          0          1          2          3          4          5   \
0  10.659607   5.623670   5.773385   8.318744  10.798757   3.392248   
1  12.707813  12.437502  12.293281  15.946799  15.800968  18.957569   
2   4.944295   7.421738   7.322419  11.696521   8.865894  15.108460   
3  12.466091   6.826808   8.880880   7.573538  13.083592   9.473492   
4   6.401173   6.063046   6.493885   6.441410   7.014855   7.835656   

          6          7          8          9   ...         20         21  \
0   9.309021   9.674849   7.147078   5.687350  ..

(4) Отбор информативных частот

In [130]:
# гистограмма

(5) Разбиение данных на обучающую и проверочную выборки

In [131]:
target_train, target_test, var_train, var_test = train_test_split(target, var, test_size=0.3, train_size=0.7)

(6) Последовательное применение алгоритмов

In [132]:
# LogisticRegression
lr: LogisticRegression = LogisticRegression()
lr.fit(var_train, target_train)

LogisticRegression()

In [133]:
# SGDClassifier
sgdc: SGDClassifier = SGDClassifier()
sgdc.fit(var_train, target_train)

SGDClassifier()

In [134]:
# RidgeClassifier
rc: RidgeClassifier = RidgeClassifier()
rc.fit(var_train, target_train)


RidgeClassifier()

(7) Оценка качества модели

In [135]:
def estimate(classifier, ttraing, ttest, vtrain, vtest):
    print(f"test score: {cross_val_score(classifier, vtrain, ttraing, cv=10, scoring='accuracy')}")
    print(f"accuracy_score: {accuracy_score(ttest, classifier.predict(vtest), normalize=True)}")
    print(f"classification_report: {classification_report(ttest, classifier.predict(vtest))}")

In [136]:
print("LogisticRegression")
estimate(lr, target_train, target_test, var_train, var_test)

LogisticRegression
test score: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
accuracy_score: 1.0
classification_report:               precision    recall  f1-score   support

           1       1.00      1.00      1.00         5
           2       1.00      1.00      1.00         4

    accuracy                           1.00         9
   macro avg       1.00      1.00      1.00         9
weighted avg       1.00      1.00      1.00         9



In [137]:
print("SGDClassifier")
estimate(sgdc, target_train, target_test, var_train, var_test)

SGDClassifier
test score: [1.  1.  1.  1.  1.  1.  0.5 1.  1.  1. ]
accuracy_score: 1.0
classification_report:               precision    recall  f1-score   support

           1       1.00      1.00      1.00         5
           2       1.00      1.00      1.00         4

    accuracy                           1.00         9
   macro avg       1.00      1.00      1.00         9
weighted avg       1.00      1.00      1.00         9



In [138]:
print("RidgeClassifier")
estimate(rc, target_train, target_test, var_train, var_test)

RidgeClassifier
test score: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
accuracy_score: 1.0
classification_report:               precision    recall  f1-score   support

           1       1.00      1.00      1.00         5
           2       1.00      1.00      1.00         4

    accuracy                           1.00         9
   macro avg       1.00      1.00      1.00         9
weighted avg       1.00      1.00      1.00         9



(8) Подбор гиперпараметров

In [139]:
print("LogisticRegression")
parameters: dict[str, Union[list[str], list[Union[float, int]]]] = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]
}
grid: GridSearchCV = GridSearchCV(
    LogisticRegression(),
    parameters,
    cv=5
)
lr_grid: Union[GridSearchCV, Any] = grid.fit(var_train, target_train)
print(lr_grid)

LogisticRegression
GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
                         'penalty': ['l1', 'l2']})


In [140]:
print("SGDClassifier")
parameters: dict[str, Union[list[str], list[Union[float, int]]]] = {
    'alpha': [10 ** x for x in range(-6, 1)],
    'l1_ratio': [0, 0.05, 0.1, 0.2, 0.5, 0.8, 0.9, 0.95, 1],
}
grid: GridSearchCV = GridSearchCV(
    SGDClassifier(),
    parameters,
    cv=5
)
sgd_grid: Union[GridSearchCV, Any] = grid.fit(var_train, target_train)
print(sgd_grid)

SGDClassifier
GridSearchCV(cv=5, estimator=SGDClassifier(),
             param_grid={'alpha': [1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1],
                         'l1_ratio': [0, 0.05, 0.1, 0.2, 0.5, 0.8, 0.9, 0.95,
                                      1]})


In [141]:
print("RidgeClassifier")
parameters: dict[str, Union[list[str], list[Union[float, int]]]] = {
    'alpha': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
}
grid: GridSearchCV = GridSearchCV(
    RidgeClassifier(),
    parameters,
    cv=5
)
rc_grid: Union[GridSearchCV, Any] = grid.fit(var_train, target_train)
print(rc_grid)

RidgeClassifier
GridSearchCV(cv=5, estimator=RidgeClassifier(),
             param_grid={'alpha': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
                                   1.0]})


(9) Переобучение модели

In [142]:
ic("LogisticRegression")
ic(lr_grid.best_params_)
ic(lr_grid.best_score_)
ic(lr_grid.predict(var_test))
ic(accuracy_score(target_test, lr_grid.predict(var_test), normalize=True))

ic| 'LogisticRegression'
ic| lr_grid.best_params_: {'C': 0.001, 'penalty': 'l2'}
ic| lr_grid.best_score_: 1.0
ic| lr_grid.predict(var_test): array([1, 2, 2, 1, 1, 2, 1, 1, 2])
ic| accuracy_score(target_test, lr_grid.predict(var_test), normalize=True): 1.0


1.0

In [143]:
ic("SGDClassifier")
ic(sgd_grid.best_params_)
ic(sgd_grid.best_score_)
ic(sgd_grid.predict(var_test))
ic(accuracy_score(target_test, sgd_grid.predict(var_test), normalize=True))

ic| 'SGDClassifier'
ic| sgd_grid.best_params_: {'alpha': 1e-06, 'l1_ratio': 0.1}
ic| sgd_grid.best_score_: 1.0
ic| sgd_grid.predict(var_test): array([1, 2, 2, 1, 1, 2, 1, 1, 2])
ic| accuracy_score(target_test, sgd_grid.predict(var_test), normalize=True): 1.0


1.0

In [144]:
ic("RidgeClassifier")
ic(rc_grid.best_params_)
ic(rc_grid.best_score_)
ic(rc_grid.predict(var_test))
ic(accuracy_score(target_test, rc_grid.predict(var_test), normalize=True))

ic| 'RidgeClassifier'
ic| rc_grid.best_params_: {'alpha': 0.1}
ic| rc_grid.best_score_: 1.0
ic| rc_grid.predict(var_test): array([1, 2, 2, 1, 1, 2, 1, 1, 2])
ic| accuracy_score(target_test, rc_grid.predict(var_test), normalize=True): 1.0


1.0

# Ответы на контрольные вопросы

## 1. Опишите этапы построения линейных классификаторов. Чем они отличаются и чем схожи?

## 2. Что означает L-1 и L-2 регуляризация?

## 3. В чем заключается метод стохастического градиентного спуска? Где и когда его можно использовать?