## Importa as Bibliotecas e a Base de Dados

In [None]:
# Chama as bibliotecas
import sklearn as sk
import numpy as np
import pandas as pd
import matplotlib.pyplot as ptl

# Carrega a base de dados
data = pd.read_csv("CTG.csv")

# Eliminando colunas que não serão utilizadas
data = data.drop(data.columns[0], axis=1)

data.head()

## Cria o Dataframe

In [None]:
df_data = pd.DataFrame(data)
df_data.info()

## Cria o Dataset de Treino

In [None]:
from sklearn.model_selection import train_test_split

# Separa os dados em treino, teste e validação
x_train, x_temp, y_train, y_temp = train_test_split(df_data, data["NSP"], test_size=0.5, random_state=42)
x_validation, x_test, y_validation, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

## Regra do Cotovelo Para Achar o Melhor K

In [None]:
from sklearn.neighbors import KNeighborsClassifier

error_tx = []
x_validation = np.ascontiguousarray(x_validation)

for i in range(1, 100):
    KNN = KNeighborsClassifier(n_neighbors=i)
    KNN.fit(x_train, y_train)
    pred = KNN.predict(x_validation)
    error_tx.append(np.mean(pred!=y_validation))


## Printa o Grafico de Erro dos K's

In [None]:
ptl.figure (figsize=(11,7))
ptl.plot(range(1,100),error_tx,color='blue',linestyle='dashed',marker='o')
ptl.xlabel('K')
ptl.ylabel('Erro')

# Tenta encontrar o melhor modelo

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

higher = -1
for i in ("distance", "uniform"):
    for j in range(1, 100):
        KNN = KNeighborsClassifier(n_neighbors=j, weights=i)
        KNN.fit(x_train, y_train)
        pred = KNN.predict(x_validation)
        if accuracy_score(y_validation, pred) > higher:
            higher = accuracy_score(y_validation, pred)
            best_k = j
            best_w = i

print("\nMelhor configuração para o KNN")
print("K: ",best_k," Métrica: ", best_w," Acc: ", higher)

## Executa o Melhor Modelo

In [None]:
from sklearn.metrics import classification_report

KNN_model = KNeighborsClassifier(n_neighbors=3, weights="distance")
KNN_model.fit(x_train, y_train)
opinion = KNN_model.predict(x_validation)
print("Acc: ", accuracy_score(y_validation, opinion))
print("Relatório de Classificação:\n", classification_report(y_validation, pred))
confusion_matrix(y_validation, opinion)