# Importações

In [None]:
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import matplotlib.pyplot as plt

# Carregando o dataset

In [None]:
iris = datasets.load_iris()

# Criando o DataFrame

In [None]:
df_iris = pd.DataFrame(data=iris.data,columns=iris.feature_names)

# Informações da base de dados:



In [None]:
df_iris.info()
df_iris.head().T

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB


Unnamed: 0,0,1,2,3,4
sepal length (cm),5.1,4.9,4.7,4.6,5.0
sepal width (cm),3.5,3.0,3.2,3.1,3.6
petal length (cm),1.4,1.4,1.3,1.5,1.4
petal width (cm),0.2,0.2,0.2,0.2,0.2


# Criando a coluna com os valores da variável target.

In [None]:
df_iris['class'] = iris.target

# Definindo o percentual de teste e treino em 50%

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_iris.drop('class',axis=1), df_iris['class'], test_size=0.5)

# Definindo o número de vizinhos = 5

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

# Treino do KNN com o conjunto de teste

In [None]:
knn.fit(X_train, y_train)

resultado = knn.predict(X_test)
resultado

array([0, 0, 0, 1, 2, 2, 0, 0, 0, 2, 1, 2, 2, 0, 1, 2, 1, 0, 1, 1, 0, 1,
       2, 1, 0, 0, 2, 0, 1, 2, 0, 2, 1, 2, 2, 1, 2, 2, 0, 1, 2, 1, 0, 1,
       1, 1, 1, 2, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 2, 2, 1, 2, 2, 1, 1,
       2, 2, 0, 1, 0, 1, 2, 2, 2])

# Matriz de Confusão

In [None]:
print (pd.crosstab(y_test,resultado, rownames=['Real'], colnames=['Predito'], margins=True))

Predito   0   1   2  All
Real                    
0        21   0   0   21
1         0  25   3   28
2         0   0  26   26
All      21  25  29   75


# Métricas

In [None]:
print(metrics.classification_report(y_test,resultado,target_names=iris.target_names))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        21
  versicolor       1.00      0.89      0.94        28
   virginica       0.90      1.00      0.95        26

    accuracy                           0.96        75
   macro avg       0.97      0.96      0.96        75
weighted avg       0.96      0.96      0.96        75



# Definindo a lista de valores.

In [None]:
k_list = list(range(1,25))

# Colocando os valores em um dicionário

In [None]:
parametros = dict(n_neighbors=k_list)

# Instanciando o objeto GridSearch

In [None]:
grid = GridSearchCV(knn, parametros, cv=5, scoring='accuracy')

# Treinando o objeto

In [None]:
grid.fit(df_iris.drop('class',axis=1),df_iris['class'])

GridSearchCV(cv=5, error_score=nan,
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=5, p=2,
                                            weights='uniform'),
             iid='deprecated', n_jobs=None,
             param_grid={'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                         13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                         23, 24]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

# Exibindo o melhor valor para o parâmetro K

In [None]:
print("Melhores parametros {} com o valor de acurácia {} ".format(grid.best_params_,grid.best_score_))

Melhores parametros {'n_neighbors': 6} com o valor de acurácia 0.9800000000000001 
