In [7]:
# Задача 1.
# Исследовать качество работы классификаторы в зависимости от выбранного метода преобразования данных
# Изучить методы преобразования данных: from sklearn.preprocessing https://scikit-learn.org/stable/modules/preprocessing.html
# Критерий качества - accuracy
# Сформировать таблицу для сравнения

In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, Normalizer, QuantileTransformer, PowerTransformer, Binarizer

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scalers = [
    ("StandardScaler", StandardScaler()),
    ("MinMaxScaler", MinMaxScaler()),
    ("MaxAbsScaler", MaxAbsScaler()),
    ("RobustScaler", RobustScaler()),
    ("Normalizer", Normalizer()),
    ("QuantileTransformer", QuantileTransformer()),
    ("PowerTransformer", PowerTransformer()),
    ("Binarizer", Binarizer())
]

results = []

for name, scaler in scalers:
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    classifier = KNeighborsClassifier(n_neighbors=1)
    classifier.fit(X_train_scaled, y_train)

    y_pred = classifier.predict(X_test_scaled)

    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    results.append((name, accuracy, conf_matrix))

df_results = pd.DataFrame(results, columns=["Scaler", "Accuracy", "Confusion Matrix"])
print(df_results)


                Scaler  Accuracy                      Confusion Matrix
0       StandardScaler  0.977778  [[19, 0, 0], [0, 12, 1], [0, 0, 13]]
1         MinMaxScaler  1.000000  [[19, 0, 0], [0, 13, 0], [0, 0, 13]]
2         MaxAbsScaler  0.977778  [[19, 0, 0], [0, 12, 1], [0, 0, 13]]
3         RobustScaler  0.911111  [[19, 0, 0], [0, 11, 2], [0, 2, 11]]
4           Normalizer  0.977778  [[19, 0, 0], [0, 13, 0], [0, 1, 12]]
5  QuantileTransformer  1.000000  [[19, 0, 0], [0, 13, 0], [0, 0, 13]]
6     PowerTransformer  0.977778  [[19, 0, 0], [0, 12, 1], [0, 0, 13]]
7            Binarizer  0.288889  [[0, 19, 0], [0, 13, 0], [0, 13, 0]]




MinMaxScaler и QuantileTransformer показали лучшие результаты. Binarizer показал худший результат, преобразование в бинарное представление не пододит под задачу.