# Title: k-Nearest Neighbors (k-NN) for Classification

In this exercise, you will implement a k-NN classifier for a binary classification problem using scikit-learn. You will work with a synthetic dataset generated using the `make_moons` function. The goal is to build a decision tree model, predict the target labels, and evaluate its performance.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [None]:
# TODO: Generate a moons-shaped dataset for binary classification. Use make_moons function
# TODO: Try multiple values for noise level (e.g., noise=0.1, 0.2, 0.3 etc.)
X, y = # TODO

# TODO: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = # TODO

# TODO: Implement k-NN algorithm using scikit-learn using the KNeighborsClassifier.
# TODO: Try multiple values for k of 3, 5, 7 etc.
k = # TODO
knn_classifier = # TODO

# TODO: Train the k-NN classifier on the training data

# TODO: Predict on the training and testing data
y_train_pred = # TODO
y_test_pred = # TODO

# TODO: Calculate the accuracy
train_accuracy = # TODO
test_accuracy = # TODO

# Print the accuracy
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

In [None]:
# Plot the decision boundaries
plt.figure(figsize=(8, 4))

# Determine the minimum and maximum values of the features to define the plotting boundaries
h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

# Create a meshgrid of points covering the entire feature space with a specified step size 'h'
# The meshgrid will be used to predict the class labels for each point in the feature space
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# Predict the class labels for each point in the meshgrid using the k-NN classifier
Z = knn_classifier.predict(np.c_[xx.ravel(), yy.ravel()])

# Reshape the predicted labels to match the shape of the meshgrid
Z = Z.reshape(xx.shape)

# Plot the decision boundaries as filled contour plots, where different regions represent different classes
plt.contourf(xx, yy, Z, cmap='viridis', alpha=0.6)

# TODO: Plot the training and testing data points on the same graph
plt.scatter(# TODO[:, 0], # TODO[:, 1], c=y_train, cmap='viridis', edgecolor='black', linewidth=1, marker='s', label='Train Data')
plt.scatter(# TODO[:, 0], # TODO[:, 1], c=y_test, cmap='viridis', edgecolor='black', linewidth=1, marker='^', label='Test Data')

# Customize plot
plt.title('Decision Boundaries for k-Nearest Neighbors (k-NN) Classification on Moons-Shaped Dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')

plt.colorbar(ticks=[0, 1], label='Class')
plt.legend()
plt.grid()

plt.show()

# Decision Trees for Classification

In this exercise, you will implement a decision tree classifier for a binary classification problem using scikit-learn. You will work with a synthetic dataset generated using the `make_moons` function. The goal is to build a decision tree model, predict the target labels, and evaluate its performance.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [None]:
# TODO: Generate a moons-shaped dataset for binary classification. Use make_moons function
# TODO: Try multiple values for noise level (e.g., noise=0.1, 0.2, 0.3 etc.)
X, y = # TODO

# TODO: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = # TODO

# TODO: Implement a decision tree algorithm using scikit-learn using the DecisionTreeClassifier.
# TODO: Try multiple values for max depth of 2, 3, 5 etc.
dt_classifier = # TODO

# TODO: Train the decision tree classifier on the training data

# TODO: Predict on the training and testing data
y_train_pred = # TODO
y_test_pred = # TODO

# TODO: Calculate the accuracy of the model
train_accuracy = # TODO
test_accuracy = # TODO

# Print the training and testing accuracies
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

In [None]:
# Plot the decision boundaries
plt.figure(figsize=(8, 4))

# Determine the minimum and maximum values of the features to define the plotting boundaries
h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

# Create a meshgrid of points covering the entire feature space with a specified step size 'h'
# The meshgrid will be used to predict the class labels for each point in the feature space
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# Predict the class labels for each point in the meshgrid using the k-NN classifier
Z = dt_classifier.predict(np.c_[xx.ravel(), yy.ravel()])

# Reshape the predicted labels to match the shape of the meshgrid
Z = Z.reshape(xx.shape)

# Plot the decision boundaries as filled contour plots, where different regions represent different classes
plt.contourf(xx, yy, Z, cmap='viridis', alpha=0.6)

# TODO: Plot the training and testing data points on the same graph
plt.scatter(# TODO[:, 0], # TODO[:, 1], c=y_train, cmap='viridis', edgecolor='black', linewidth=1, marker='s', label='Train Data')
plt.scatter(# TODO[:, 0], # TODO[:, 1], c=y_test, cmap='viridis', edgecolor='black', linewidth=1, marker='^', label='Test Data')

# Customize plot
plt.title('Decision Boundaries for k-Nearest Neighbors (k-NN) Classification on Moons-Shaped Dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')

plt.colorbar(ticks=[0, 1], label='Class')
plt.legend()
plt.grid()

plt.show()