In [27]:
# Import all the required libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from matplotlib.colors import ListedColormap

In [28]:
# Loading the iris dataset
url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv'
iris = pd.read_csv(url)

In [29]:
# Extracting data and labels as numpy arrays from the dataset
X = iris.iloc[:, :-1].values
Y = iris.iloc[:, -1].values

print(X.shape)
print(Y.shape)

(150, 4)
(150,)


In [30]:
# Spliting the data into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20, random_state = 42)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(120, 4)
(120,)
(30, 4)
(30,)


In [31]:
# We first preprocess the data by standardizing the features.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [32]:
# Defining the value of K
sqrt_len = (np.sqrt(len(X_train)))
K = int(np.floor(sqrt_len))
print(K)

10


In [33]:
# Training the KNN model
model = KNeighborsClassifier(n_neighbors=K, metric = 'minkowski', p = 2)
model.fit(X_train, Y_train)

In [34]:
# Printing confusion matrix for Training set
Y_pred = model.predict(X_train)
matrix = confusion_matrix(Y_train, Y_pred)
print(matrix)
accuracy_score(Y_train, Y_pred)

[[40  0  0]
 [ 0 39  2]
 [ 0  5 34]]


0.9416666666666667

In [35]:
# Printing confusion matrix for Test set
Y_pred = model.predict(X_test)
matrix = confusion_matrix(Y_test, Y_pred)
print(matrix)
accuracy_score(Y_test, Y_pred)

[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


1.0

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [55]:
# Visualization
X1_feature = iris.columns[0] # Sepal Length
X2_feature = iris.columns[1] # Sepal Width
X3_feature = iris.columns[2] # Petal Length
X4_feature = iris.columns[3] # Petal Width

# Define features and meshgrid
features = [X1_feature, X2_feature, X3_feature, X4_feature]
meshgrid_ranges = [(iris[feature].min() - 10, iris[feature].max() + 10, 1 if i < 2 else 1000) for i, feature in enumerate(features)]

# Plotting decision boundaries for all pairs of features
plt.figure(figsize=(12, 12))

for i in range(3):
    for j in range(i + 1, 4):
        # Create meshgrid for current pair of features
        X1, X2 = np.meshgrid(np.arange(start=meshgrid_ranges[i][0], stop=meshgrid_ranges[i][1], step=1),np.arange(start=meshgrid_ranges[j][0], stop=meshgrid_ranges[j][1], step=1))
        
        # Predict and plot decision boundaries
        plt.subplot(3, 2, (i * 2) + j - 1)
        plt.contourf(X1, X2, model.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green')))
        plt.title(f'{features[i]} vs {features[j]}')
        plt.xlabel(features[i])
        plt.ylabel(features[j])

plt.tight_layout()
plt.show()

ValueError: num must be an integer with 1 <= num <= 6, not 0

<Figure size 1200x1200 with 0 Axes>

In [57]:
markers = ('s', 'x', 'o')
colors = ('red', 'blue', 'lightgreen')
cmap = ListedColormap(colors[:len(np.unique(iris.target))])
for idx, cl in enumerate(np.unique(Y_test)):
    plt.scatter(x=X[Y_test == cl, 0], y=X[Y_test == cl, 1],
               c=cmap(idx), marker=markers[idx], label=cl)

AttributeError: 'DataFrame' object has no attribute 'target'