In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor #KNN
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV,KFold
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report,mean_squared_error
import matplotlib.pyplot as plt

# KNN Classification

In [None]:
data=pd.read_csv("D:\Workshops\Machine Learning for Data Science With Python\Datasets\Bank.CSV")
data.head()

In [None]:
x=data.iloc[:,:7].values
y=data.iloc[:,7].values

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

# Creating KNN Classifier object

In [None]:
knncl = KNeighborsClassifier(n_neighbors=5)
knncl.fit(x_train, y_train)

# Predictions & accuracy

In [None]:
y_pred=knncl.predict(x_test)
y_pred

In [None]:
accuracy_score(y_test,y_pred)

In [None]:
confusion_matrix(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

# Selecting the optimal k value

# Validation set approach

In [None]:
xtr,xts,ytr,yts=train_test_split(x_train,y_train,test_size=0.2,random_state=0)

In [None]:
K=range(1,21)
Errors=[]

for k in K:
    model=KNeighborsClassifier(n_neighbors=k)
    model.fit(xtr,ytr)
    ypr=model.predict(xts)
    Errors.append(1-accuracy_score(yts,ypr))

In [None]:
Errors

# Plotting error values with number of neighbors

In [None]:
plt.plot(K,Errors,'bx-')
plt.show()

# Cross Validation

In [None]:
K=range(1,21)
Errors=[]

for k in K:
    model=KNeighborsClassifier(n_neighbors=k)
    cvals=1-cross_val_score(model,x_train,y_train,cv=10,scoring="accuracy")
    Errors.append(np.sqrt(cvals).mean())

In [None]:
Errors

In [None]:
plt.plot(K,Errors,'bx-')
plt.show()

# Hyperparameter Optimization

In [None]:
params={"n_neighbors":[1,2,3,4,5,6,7,8,9,10]}
model=KNeighborsClassifier()
cval=KFold(n_splits=10)

In [None]:
gsearch=GridSearchCV(model,params,cv=cval)

In [None]:
results=gsearch.fit(x_train,y_train)

In [None]:
results.best_params_

# Select the optimal value and fit the model again

In [None]:
knncl = KNeighborsClassifier(n_neighbors=4) #Elbow point is selected
knncl.fit(x_train, y_train)

In [None]:
y_pred=knncl.predict(x_test)
y_pred

In [None]:
accuracy_score(y_test,y_pred)

# KNN Regression

In [None]:
data=pd.read_csv("D:\\Workshops\\Python for Data Science Comprehensive Workshop\\Part 04 - Machine Learning\\Datasets\\Boston.CSV")
data.head()

In [None]:
x=data.iloc[:,:12].values
y=data.iloc[:,12].values

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

# Model fitting

In [None]:
knnrg = KNeighborsRegressor(n_neighbors=6)
knnrg.fit(x_train, y_train)

# Predictions & accuracy

In [None]:
y_pred=knnrg.predict(x_test)
y_pred

In [None]:
np.sqrt(mean_squared_error(y_test,y_pred))

# Selecting the optimal k value

# Validation set approach

In [None]:
xtr,xts,ytr,yts=train_test_split(x_train,y_train,test_size=0.2,random_state=0)

In [None]:
K=range(1,21)
Errors=[]

for k in K:
    model=KNeighborsRegressor(n_neighbors=k)
    model.fit(xtr,ytr)
    ypr=model.predict(xts)
    Errors.append(np.sqrt(mean_squared_error(yts,ypr)))

In [None]:
Errors

In [None]:
plt.plot(K,Errors,"bx-")
plt.show()

# Cross Validation

In [None]:
K=range(1,21)
Errors=[]

for k in K:
    model=KNeighborsRegressor(n_neighbors=k)
    cvals=-cross_val_score(model,x_train,y_train,cv=10,scoring="neg_mean_squared_error")
    Errors.append(np.sqrt(cvals).mean())

In [None]:
Errors

In [None]:
plt.plot(K,Errors,"bx-")
plt.show()

# Hyperparameter Optimization

In [None]:
params={"n_neighbors":[1,2,3,4,5,6,7,8,9,10]}
model=KNeighborsRegressor()
cval=KFold(n_splits=10)

In [None]:
gsearch=GridSearchCV(model,params,cv=cval)

In [None]:
results=gsearch.fit(x_train,y_train)

In [None]:
results.best_params_