In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
dataset = pd.read_csv('../Social_Network_Ads.csv')
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
dataset['Gender']=le.fit_transform(dataset['Gender'])

In [None]:
print(dataset.head())

In [None]:
X = dataset.iloc[:, 2:4].values
y = dataset.iloc[:, -1].values

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                    test_size = 0.25, random_state = 0)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

In [None]:
knn = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=11))
])
knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

In [None]:
from sklearn.model_selection import  cross_val_score
k_values = [i for i in range (1,30)]
scores = []

scaler = StandardScaler()
X_ = scaler.fit_transform(X)

for k_ in k_values:
    knn_ = KNeighborsClassifier(n_neighbors=k_)
    score = cross_val_score(knn_, X_, y, cv=11)
    scores.append(np.mean(score))

In [None]:
plt.plot(k_values, scores, marker='o')
plt.xlabel("K Values")
plt.ylabel("Accuracy Score")
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

In [None]:
import seaborn as sns
sns.heatmap(cm, annot=True, fmt="d")
plt.show()

In [None]:
from sklearn.inspection import DecisionBoundaryDisplay

_, axs = plt.subplots(ncols=2, figsize=(12, 5))
label_mapping = {0: "Not Purchased", 1: "Purchased"}

for ax, weights in zip(axs, ("uniform", "distance")):
    knn.set_params(knn__weights=weights).fit(X_test[:, 0:2], y_test)
    disp = DecisionBoundaryDisplay.from_estimator(
        knn,
        X_test[:, 0:2],
        response_method="predict",
        plot_method="pcolormesh",
        ylabel="Estimated Salary",
        xlabel="Age",
        shading="auto",
        alpha=0.5,
        ax=ax,
    )
    scatter = disp.ax_.scatter(X_test[:, 0], X_test[:, 1], c=y_test)
    disp.ax_.legend(
        scatter.legend_elements()[0],
        [label_mapping[int(label)] for label in dataset['Purchased'].unique()],
        loc="lower left",
        title="Classes",
    )
    _ = disp.ax_.set_title(
        f"2-Class classification\n(k=11, weights={weights!r})"
    )

plt.show()