# KNN PARA DATASET FAKE BILLS
## FUENTE [KAGGLE](https://www.kaggle.com/datasets/alexandrepetit881234/fake-bills)

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [12]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

In [3]:
df = pd.read_csv('/content/fake_bills.csv',delimiter=';')
df.head(10)

Unnamed: 0,is_genuine,diagonal,height_left,height_right,margin_low,margin_up,length
0,True,171.81,104.86,104.95,4.52,2.89,112.83
1,True,171.46,103.36,103.66,3.77,2.99,113.09
2,True,172.69,104.48,103.5,4.4,2.94,113.16
3,True,171.36,103.91,103.94,3.62,3.01,113.51
4,True,171.73,104.28,103.46,4.04,3.48,112.54
5,True,172.17,103.74,104.08,4.42,2.95,112.81
6,True,172.34,104.18,103.85,4.58,3.26,112.81
7,True,171.88,103.76,104.08,3.98,2.92,113.08
8,True,172.47,103.92,103.67,4.0,3.25,112.85
9,True,172.47,104.07,104.02,4.04,3.25,113.45


# EDA

In [4]:
df.dtypes

Unnamed: 0,0
is_genuine,bool
diagonal,float64
height_left,float64
height_right,float64
margin_low,float64
margin_up,float64
length,float64


In [5]:
df.isnull().sum().sum()

37

In [6]:
df.shape

(1500, 7)

In [7]:
df.dropna(inplace=True)
df.shape

(1463, 7)

In [9]:
df['is_genuine'] = df['is_genuine'].astype(int)
df.dtypes
df

Unnamed: 0,is_genuine,diagonal,height_left,height_right,margin_low,margin_up,length
0,1,171.81,104.86,104.95,4.52,2.89,112.83
1,1,171.46,103.36,103.66,3.77,2.99,113.09
2,1,172.69,104.48,103.50,4.40,2.94,113.16
3,1,171.36,103.91,103.94,3.62,3.01,113.51
4,1,171.73,104.28,103.46,4.04,3.48,112.54
...,...,...,...,...,...,...,...
1495,0,171.75,104.38,104.17,4.42,3.09,111.28
1496,0,172.19,104.63,104.44,5.27,3.37,110.97
1497,0,171.80,104.01,104.12,5.51,3.36,111.95
1498,0,172.06,104.28,104.06,5.17,3.46,112.25


# ENTRENAMIENTO DEL MODELO KNN

In [13]:
X = df.drop(columns=['is_genuine'])
y = df['is_genuine']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled,y_train)

y_pred = knn.predict(X_test_scaled)
accuracy = accuracy_score(y_test,y_pred)
print(f'Accuracy :{accuracy}')

Accuracy :1.0


# BUSCAMOS MEJORES PARAMETROS

In [15]:
param_grid = {
    'n_neighbors': np.arange(1, 20, 2),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [10, 20, 30, 40, 50],
    'p': [1, 2, 3]
}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn,param_grid,cv=5,scoring='accuracy',n_jobs=-1)
grid_search.fit(X_train,y_train)
print("Mejores parámetros:", grid_search.best_params_)
print("Mejor precisión:", grid_search.best_score_)

Mejores parámetros: {'algorithm': 'auto', 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 5, 'p': 1, 'weights': 'uniform'}
Mejor precisión: 0.9880341880341881


In [16]:
best_knn = grid_search.best_estimator_
best_knn.fit(X_train, y_train)

# Predecir en el conjunto de prueba
y_pred = best_knn.predict(X_test)

# Evaluar el modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00
