<a href="https://colab.research.google.com/github/Pasz93/Fundamentos_de_Data_Science_Aplicado_Financas/blob/main/Projeto_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
#Importando os dados

import pandas as pd

url = 'https://github.com/daniel-usp/MachineLearning/raw/main/05%20-%20KNN/varejo1.xlsx'

dados = pd.read_excel(url)
y = dados['pagamento']
x = dados.drop('pagamento', axis = 1)
dados

Unnamed: 0,pagamento,solteiro,idade,mulher
0,1,0,20,0
1,1,0,34,1
2,1,0,21,1
3,1,0,22,1
4,1,0,22,1
...,...,...,...,...
175,0,1,20,0
176,0,1,23,0
177,0,1,18,0
178,0,0,24,1


In [40]:

#Padronizando as variáveis

#Colunas de X que são categóricas
is_dummy = X.nunique() == 2
dummy_columns = X.columns[is_dummy]

#Padronização das features X com StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(X)

scaled_features = pd.DataFrame(scaled_features, columns=X.columns)
scaled_features[dummy_columns] = dados[dummy_columns]
scaled_features

Unnamed: 0,solteiro,idade,mulher
0,0,-0.977056,0
1,0,1.225253,1
2,0,-0.819748,1
3,0,-0.662441,1
4,0,-0.662441,1
...,...,...,...
175,1,-0.977056,0
176,1,-0.505133,0
177,1,-1.291672,0
178,0,-0.347825,1


In [41]:
#Divisão Treino Teste

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(scaled_features,y,
                                                    test_size=0.20)


In [46]:
#Usando o KNN


from sklearn.neighbors import KNeighborsClassifier

#Número de Vizinhos (k)
k = 7
knn = KNeighborsClassifier(n_neighbors=k)

knn.fit(X_train,y_train)
prediction = knn.predict(X_test)
prediction

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1])

In [47]:
from sklearn.metrics import classification_report,confusion_matrix

print(confusion_matrix(y_test,prediction))

[[ 4  3]
 [ 2 27]]


In [48]:
print(classification_report(y_test,prediction))

              precision    recall  f1-score   support

           0       0.67      0.57      0.62         7
           1       0.90      0.93      0.92        29

    accuracy                           0.86        36
   macro avg       0.78      0.75      0.77        36
weighted avg       0.85      0.86      0.86        36



In [49]:
#Gráfico Otimizado do vizinho ótimo

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

def errors(X,y):
  X_train, X_test, y_train, y_test = train_test_split(scaled_features,y,
                                                    test_size=0.20, random_state = 42)
  errors = []
  for k in range(1, 16, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    error = (y_pred != y_test).mean()
    errors.append(error)
  return errors

def plot(X,y):

  error_rates = errors(X,y)

  # Plotting the error rates for odd values of K
  k_values = list(range(1, 16, 2))
  fig = go.Figure(data=go.Scatter(x=k_values, y=error_rates, mode='lines+markers'))
  fig.update_layout(title='Average Error Rate for Different K',
                    xaxis_title='Number of Neighbors (K)',
                    yaxis_title='Error Rate', xaxis=dict(
                        tickmode='array',
                        tickvals=k_values,
                        dtick=2))

  fig.update_traces(marker=dict(color='red', size=8),
                    line=dict(color='blue', dash='dash'))


  # Find the index of the minimum error rate
  min_error_idx = np.argmin(error_rates)
  min_error_k = k_values[min_error_idx]
  min_error_rate = error_rates[min_error_idx]

  # Add a scatter marker at the point of minimum error rate
  # Add a scatter marker at the point of minimum error rate
  fig.add_trace(go.Scatter(
      x=[min_error_k],
      y=[min_error_rate],
      mode='markers',
      marker=dict(color='black', size=18, symbol='star'),
      showlegend=False
  ))

  return fig.show()

In [50]:
plot(X,y)