In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from matplotlib.colors import ListedColormap
from sklearn.model_selection import GridSearchCV

In [None]:
data_set = sns.load_dataset('Penguins')
df = data_set.dropna()
print("Dataset has been loaded !")
# print(df)

In [None]:
X = df[[ 'flipper_length_mm', 'bill_length_mm']]
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipe = Pipeline([
    ('scaler' , StandardScaler()),
    ('model' , KNeighborsClassifier())
])
model = pipe.fit(X_train, y_train)
print("Model has been trained !")

In [None]:
pred = model.predict(X_test)
print("Prediction has been made !")
print(f"Accuracy score is: {accuracy_score(y_test, pred):.4f}")

In [None]:

print(pipe.classes_)
# → will print something like: ['Adelie' 'Chinstrap' 'Gentoo']

In [None]:
colors = ['Blue','Orange','Green']
colors_map = ListedColormap(colors)

disp = DecisionBoundaryDisplay.from_estimator(
    pipe,
    X,
    grid_resolution=500,
    eps=0.5,
    response_method='predict',
    plot_method='pcolormesh',
    cmap=colors_map,
    alpha=0.6
)

disp.ax_.pcolormesh(
    disp.xx0,
    disp.xx1,
    disp.response,
    cmap='Pastel1',
    shading='auto',
    alpha=0.6
)

# Correct way — guaranteed to match pipe.classes_ order

y_numeric = pd.Categorical(y_test, categories=pipe.classes_).codes

disp.ax_.scatter(X_test['flipper_length_mm'], X_test['bill_length_mm'],
                 c=y_numeric,   # automatically turns strings → 0,1,2
                 cmap=colors_map, edgecolor='k', s=50)


wrong = y_test != pred
disp.ax_.scatter(X_test.loc[wrong, 'flipper_length_mm'], X_test.loc[wrong, 'bill_length_mm'],
                facecolors = 'None', edgecolors='red', s=80, linewidths=3, marker='X', label= 'Misclassified')


misclassified_legend_element = [plt.Line2D([0], [0], marker= 'X', color='red', markerfacecolor='red', markersize=10, label= 'Misclassified' )]

species_legend_element = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=c, markersize=10, label=species) 
                          for species, c in zip(pipe.classes_, colors)]

final_legend = species_legend_element + misclassified_legend_element

disp.ax_.legend(handles=final_legend, title = 'species', loc='center left', bbox_to_anchor=(1.02, 0.5))  # pushes it to the rig
plt.tight_layout()

In [None]:
# ### making confusin matrix just to double check the miscalssified penguins data

# pred1 = model.predict(X)
# c1 = confusion_matrix(y, pred1, labels=['Adelie','Chinstrap','Gentoo'])
# print(c1)
# disp = ConfusionMatrixDisplay(c1, display_labels=('Adelie','Chinstrap','Gentoo'))
# disp.plot(cmap='Blues')
# plt.show()

# cm = confusion_matrix(y_test, pred, labels=['Adelie','Chinstrap','Gentoo'])
# print(cm)
# disp = ConfusionMatrixDisplay(cm, display_labels=('Adelie','Chinstrap','Gentoo'))
# disp.plot(cmap='Greens')
# plt.show()



# mistakes = y_test != pred

# mask = X_test[mistakes].copy()
# mask['true'] = y_test[mistakes]
# mask['predicted'] = pred[mistakes]
# print(mask)