In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, KFold, StratifiedKFold
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline 
sns.set(style="ticks")

In [3]:
iris = datasets.load_iris()

In [4]:
# Преобразование данных в DataFrame
data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
data['target'] = iris.target

# Замена числовых меток классов на их названия
target_names = {i: name for i, name in enumerate(iris.target_names)}
data['target'] = data['target'].map(target_names)

data.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   target             150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [6]:
x = data.drop('target', axis=1)  # Признаки
y = data['target']               # Целевая переменная

# Разделение данных на обучающую и тестовую выборки
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=999)

In [7]:
model = KNeighborsClassifier()
model.fit(x_train, y_train)

In [8]:
y_pred = model.predict(x_test)

In [9]:
y_pred

array(['versicolor', 'versicolor', 'versicolor', 'setosa', 'setosa',
       'setosa', 'setosa', 'versicolor', 'virginica', 'setosa', 'setosa',
       'virginica', 'setosa', 'virginica', 'versicolor', 'virginica',
       'virginica', 'virginica', 'virginica', 'setosa', 'setosa',
       'setosa', 'setosa', 'virginica', 'virginica', 'setosa',
       'virginica', 'virginica', 'virginica', 'virginica'], dtype=object)

In [10]:
# Оценка accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Матрица ошибок
conf_matrix = confusion_matrix(y_test, y_pred)
print("Матрица ошибок:")
print(conf_matrix)

# Полнота
recall = recall_score(y_test, y_pred, average='weighted')
print("Recall:", recall)

# Точность
precision = precision_score(y_test, y_pred, average='weighted')
print("Precision:", precision)

# F1-мера
f1 = f1_score(y_test, y_pred, average='weighted')
print("F1-мера:", f1)

Accuracy: 0.9666666666666667
Матрица ошибок:
[[12  0  0]
 [ 0  5  1]
 [ 0  0 12]]
Recall: 0.9666666666666667
Precision: 0.9692307692307692
F1-мера: 0.9658181818181819


In [11]:
knn = KNeighborsClassifier()

# Гиперпараметры для GridSearchCV
param_grid = {'n_neighbors': range(1, 21)}  
# Гиперпараметры для RandomizedSearchCV
param_random_grid = {'n_neighbors': range(1, 21)}

# Стратегии кросс-валидации
kf = KFold(n_splits=5, shuffle=True, random_state=99)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=99)

# GridSearchCV
grid_search = GridSearchCV(knn, param_grid, cv=kf)
grid_search.fit(x_train, y_train)
print("GridSearchCV: Лучшее значение K:", grid_search.best_params_['n_neighbors'])

# RandomizedSearchCV
random_search = RandomizedSearchCV(knn, param_random_grid, n_iter=10, cv=skf)
random_search.fit(x_train, y_train)
print("RandomizedSearchCV: Лучшее значение K:", random_search.best_params_['n_neighbors'])

# Оценка качества оптимальных моделей
y_pred_grid = grid_search.predict(x_test)
accuracy_grid = accuracy_score(y_test, y_pred_grid)
print("accuracy на тестовой выборке (GridSearchCV):", accuracy_grid)

y_pred_random = random_search.predict(x_test)
accuracy_random = accuracy_score(y_test, y_pred_random)
print("accuracy на тестовой выборке (RandomizedSearchCV):", accuracy_random)

GridSearchCV: Лучшее значение K: 3
RandomizedSearchCV: Лучшее значение K: 3
accuracy на тестовой выборке (GridSearchCV): 0.9333333333333333
accuracy на тестовой выборке (RandomizedSearchCV): 0.9333333333333333
