## KNN Classifier

In [1]:
#import bibliotecas

import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics as mt

### Load Datasets

In [2]:
X_train = pd.read_csv('X_training.csv')
y_train = pd.read_csv('y_training.csv') 
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')
X_val = pd.read_csv('X_validation.csv')
y_val = pd.read_csv('y_validation.csv') 

In [3]:
X_train.head(2)

Unnamed: 0,id,customer_type,age,class,flight_distance,inflight_wifi_service,departure_arrival_time_convenient,ease_of_online_booking,gate_location,food_and_drink,...,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_in_minutes,arrival_delay_in_minutes,gender_Female,gender_Male,type_of_travel_business_travel,type_of_travel_personal_travel
0,13508,1,0.5,0.0,0.03958,0.6,0.6,0.6,0.6,1.0,...,0.5,1.0,0.6,0.4,0.0,0.013848,1.0,0.0,1.0,0.0
1,28874,1,0.24359,0.0,0.205775,0.6,0.4,0.4,0.4,0.6,...,0.5,0.5,0.2,0.6,0.0,0.0,0.0,1.0,1.0,0.0


### Dados de Treino

In [4]:
# preparação dos dados 

y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

**Remover a coluna do ID dos datasets pois não é uma coluna categórica**

In [5]:
X_train = X_train.drop(['id'] , axis =1)
X_test = X_test.drop(['id'] , axis =1)
X_val = X_val.drop(['id'] , axis =1)

In [6]:
X_val.head(2)

Unnamed: 0,customer_type,age,class,flight_distance,inflight_wifi_service,departure_arrival_time_convenient,ease_of_online_booking,gate_location,food_and_drink,online_boarding,...,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_in_minutes,arrival_delay_in_minutes,gender_Female,gender_Male,type_of_travel_business_travel,type_of_travel_personal_travel
0,1,0.525641,1.0,0.714055,1.0,1.0,1.0,1.0,0.8,0.8,...,0.75,0.75,0.8,0.8,0.0,0.0,0.0,1.0,1.0,0.0
1,1,0.615385,1.0,0.054725,0.4,1.0,1.0,1.0,0.4,0.4,...,0.25,0.0,0.4,0.6,0.004916,0.004946,0.0,1.0,1.0,0.0


In [7]:
k_list = np.arange(3,19,2)
accuracy_list = []
recall_list = []
precision_list = []
f1score_list = []

for i in k_list:
    # KNN 
    print (f'Numero de K:{i}')
    knn = KNeighborsClassifier( n_neighbors = i )
    knn.fit( X_train, y_train )
    y_pred = knn.predict( X_train )
    
    # metrica acuracia
    accuracy = mt.accuracy_score( y_train, y_pred )
    print(f'Accuracy: {accuracy} ')
    accuracy_list.append(accuracy)

    #precision
    precision = mt.precision_score( y_train, y_pred)
    print (f'Precision:{precision} ')
    precision_list.append(precision)

    #recall
    recall = mt.recall_score( y_train, y_pred)
    print (f'Recall:{recall} ')
    recall_list.append(recall)

    #F1-Score
    f1_score = mt.f1_score( y_train, y_pred)
    print(f'F1-Score:{f1_score} \n')
    f1score_list.append(f1_score)
    

Numero de K:3
Accuracy: 0.9570157898365855 
Precision:0.9731907070031757 
Recall:0.9263395698103601 
F1-Score:0.9491873563405767 

Numero de K:5
Accuracy: 0.9475832586361442 
Precision:0.9700867789688616 
Recall:0.9070255822833142 
F1-Score:0.9374969167776627 

Numero de K:7
Accuracy: 0.943018685789147 
Precision:0.9685847696216439 
Recall:0.8976390479826906 
F1-Score:0.9317633847474981 

Numero de K:9
Accuracy: 0.9395573329655933 
Precision:0.9678580078192576 
Recall:0.8900980017818506 
F1-Score:0.9273507815219373 

Numero de K:11
Accuracy: 0.9371026684134317 
Precision:0.9664247769174682 
Recall:0.885643375334097 
F1-Score:0.9242723604907934 

Numero de K:13
Accuracy: 0.9347996966144936 
Precision:0.9649947753396029 
Recall:0.8815387552500955 
F1-Score:0.9213808241045595 

Numero de K:15
Accuracy: 0.9340550230986693 
Precision:0.9645721458958086 
Recall:0.8801705485554283 
F1-Score:0.9204405550194656 

Numero de K:17
Accuracy: 0.9326070468178997 
Precision:0.9633716261042634 
Recall:

**Dataframe dos resultados de treinamentos**

In [8]:
df_resultados = { 'K' : k_list , 
             'Acurácia' : accuracy_list,
             'Precisão' : precision_list,
             'Recall'   : recall_list,
             'F1-Score' : f1score_list}

df_resultados = pd.DataFrame(df_resultados)
df_resultados

Unnamed: 0,K,Acurácia,Precisão,Recall,F1-Score
0,3,0.957016,0.973191,0.92634,0.949187
1,5,0.947583,0.970087,0.907026,0.937497
2,7,0.943019,0.968585,0.897639,0.931763
3,9,0.939557,0.967858,0.890098,0.927351
4,11,0.937103,0.966425,0.885643,0.924272
5,13,0.9348,0.964995,0.881539,0.921381
6,15,0.934055,0.964572,0.880171,0.920441
7,17,0.932607,0.963372,0.87788,0.918641


### Dados de Validação

**Definindo o melhor K para validação**


In [9]:
best_k = accuracy_list.index(max(accuracy_list))



In [10]:
k_list[best_k]

3

**Retreinar o modelo com o melhor parametro sobre os dados de validação**

In [11]:
# Definition
model = KNeighborsClassifier( n_neighbors = k_list[best_k] )

#Traning 
model.fit(X_train , y_train)

#Previsão sobre os dados de validacao 
yhat_val = model.predict( X_val)

# metrica acuracia val
accuracy = mt.accuracy_score( y_val, yhat_val )
print(f'Accuracy: {accuracy} ')

#precision val
precision = mt.precision_score( y_val, yhat_val)
print (f'Precision:{precision} ')

#recall val
recall = mt.recall_score( y_val, yhat_val)
print (f'Recall:{recall} ')

#F1-Score val
f1_score = mt.f1_score( y_val, yhat_val)
print(f'F1-Score:{f1_score} \n')

    


Accuracy: 0.9235174876926542 
Precision:0.94254707947654 
Recall:0.8769767614522236 
F1-Score:0.9085804392138764 



### Dados de Teste

**Modelo treinado e validado, agora usar os dados teste**

In [12]:
# Definition
model_last = KNeighborsClassifier( n_neighbors = k_list[best_k] )

#Training
model_last.fit(np.concatenate((X_train, X_val)), 
               np.concatenate((y_train, y_val)))

#previsão sobre os dados de teste
ypred_test = model_last.predict(X_test)

# metrica acuracia teste
accuracy = mt.accuracy_score( y_test, ypred_test )
print(f'Accuracy: {accuracy} ')

#precision teste
precision = mt.precision_score( y_test, ypred_test)
print (f'Precision:{precision} ')

#recall teste
recall = mt.recall_score( y_test, ypred_test)
print (f'Recall:{recall} ')

#F1-Score teste
f1_score = mt.f1_score( y_test, ypred_test)
print(f'F1-Score:{f1_score} \n')




Accuracy: 0.9277024678484532 
Precision:0.9447203223086292 
Recall:0.8871975362956446 
F1-Score:0.9150558126871767 

