## K-Nearest Neighbors

In [24]:
# import the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Load the tips dataset
tips = sns.load_dataset('tips')

# Split the dataset into training and testing data 
X_train, X_test, y_train, y_test = train_test_split(tips[['total_bill', 'size']], tips['tip'], test_size=0.2, random_state=42)

# Create a KNN model 
knn = KNeighborsRegressor(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)

# Make predictions
y_pred = knn.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)

# Print the results
print(mse)

0.7436584489795918


In [11]:
# get the hyper-parameters 
knn.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [13]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 195 entries, 228 to 102
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  195 non-null    float64
 1   size        195 non-null    int64  
dtypes: float64(1), int64(1)
memory usage: 4.6 KB


## K-Nearest Neighbors Classifiers

In [18]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

# Load the iris dataset
iris = load_iris()

# Create the features and target from iris dataset
X = iris.data
y = iris.target

# Split the dataset into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)

# Train the classifier
knn.fit(X_train, y_train)

# Predict the target
y_pred = knn.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(accuracy)



1.0


### How to determine the best number of neighbors that gives highest accuracy? 

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the range of neighbors to try
neighbors = list(range(3, 21))

# Define the parameter grid for grid search
param_grid = {'n_neighbors': neighbors, 
              'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'], 
              'weights': ['uniform', 'distance'], 
                'p': [1, 2]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search to find the best number of neighbors
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best number of neighbors
best_neighbors = grid_search.best_params_['n_neighbors']
best_algorithm = grid_search.best_params_['algorithm']
best_weights = grid_search.best_params_['weights']  
best_p = grid_search.best_params_['p']

# Create a new KNN classifier with the best number of neighbors
best_knn = KNeighborsClassifier(n_neighbors=best_neighbors)

# Fit the classifier on the training data
best_knn.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = best_knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Best number of neighbors: {best_neighbors}")
print(f"Best algorithm: {best_algorithm}")
print(f"Best weights: {best_weights}")
print(f"Best p: {best_p}")
print(f"Accuracy: {accuracy}")

Best number of neighbors: 14
Best algorithm: auto
Best weights: uniform
Best p: 1
Accuracy: 1.0


next time jab is data per model train kerney k leay ye params jo calculate hue hain .. use them in model
like model=KNeighborsClassifier(n_neighbors=14, algorithm=auto, weight=uniform, p=1)