In [None]:
# used for creating the DataFrames and handling data.
import pandas as pd

# not explicitly used, but Pandas depends on it
#import numpy as np

# used for plotting the scatter plot of the dataset.
import matplotlib.pyplot as plt

# used to enhance the color and style of the scatter plot
import seaborn as sns

# used to create, train, and use the K-Nearest Neighbors model
from sklearn.neighbors import KNeighborsClassifier

# used to split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

# used for evaluating the KNN model by generating a confusion matrix
# and classification report.
from sklearn.metrics import confusion_matrix, classification_report

# Set up the data
df = pd.DataFrame({
    'Feature1': [2, 4, 4, 5, 3, 1, 4, 5, 2, 3],
    'Feature2': [3, 4, 2, 2, 5, 3, 5, 1, 4, 2],
    'Label': ['A', 'B', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'B']
})

# Plot the data
plt.figure(figsize=(6, 4))
sns.scatterplot(x='Feature1', y='Feature2', hue='Label', data=df, s=100,
                palette=['orange', 'green'], style='Label', markers=['o', 'o'])
plt.title('Dataset')
plt.xlabel('Feature1')
plt.ylabel('Feature2')
plt.legend(title='Label')
plt.grid(True)
plt.show()

# Convert string labels to numerical values
label_mapping = {'A': 0, 'B': 1}
df['Label'] = df['Label'].map(label_mapping)

# Split dataset into train and test sets
X = df[['Feature1', 'Feature2']]
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Creating and training the K-Nearest Neighbors model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [None]:
# Making predictions and evaluating the model
y_pred = knn.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, zero_division=0)

# Output the confusion matrix and classification report
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)

In [None]:
# double and balance the data

df = pd.DataFrame({
    'Feature1': [2, 5, 4, 5, 3, 1, 2, 5, 2, 3,
                 6, 3, 2, 1, 5, 2, 3, 5, 2, 1],
    'Feature2': [3, 3, 2, 2, 5, 3, 5, 1, 4, 2,
                 3, 2, 1, 5, 2, 3, 2, 4, 4, 1],
    'Label': ['A', 'B', 'B', 'A', 'A', 'A', 'A', 'B', 'A', 'B',
              'B', 'B', 'B', 'A', 'B', 'A', 'B', 'B', 'A', 'B']
})

# Plot the data
plt.figure(figsize=(6, 4))
sns.scatterplot(x='Feature1', y='Feature2', hue='Label', data=df, s=100,
                palette=['orange', 'green'], style='Label', markers=['o', 'o'])
plt.title('Dataset')
plt.xlabel('Feature1')
plt.ylabel('Feature2')
plt.legend(title='Label')
plt.grid(True)
plt.show()

# Convert string labels to numerical values
label_mapping = {'A': 0, 'B': 1}
df['Label'] = df['Label'].map(label_mapping)

# Split dataset into train and test sets
X = df[['Feature1', 'Feature2']]
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Creating and training the K-Nearest Neighbors model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [None]:
# Making predictions and evaluating the model
y_pred = knn.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, zero_division=0)

# Output the confusion matrix and classification report
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)