### Step 1: Import Necessary Libraries
```python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
```

### Step 2: Load and Preprocess the Data
```python
# Load dataset
data = pd.read_csv('your_dataset.csv')

# Separate features and target variable
X = data.iloc[:, :-1].values  # All columns except the last one
y = data.iloc[:, -1].values  # The last column

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
```

### Step 3: Implement Distance Calculation
```python
def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2))
```

### Step 4: Implement KNN Algorithm
```python
def knn_predict(X_train, y_train, X_test, k=3):
    predictions = []
    for x in X_test:
        # Compute distances from x to all training samples
        distances = [euclidean_distance(x, x_train) for x_train in X_train]
        
        # Get the k nearest samples
        k_indices = np.argsort(distances)[:k]
        k_nearest_labels = [y_train[i] for i in k_indices]
        
        # Majority vote
        most_common = Counter(k_nearest_labels).most_common(1)
        predictions.append(most_common[0][0])
        
    return np.array(predictions)
```

### Step 5: Make Predictions
```python
k = 3
y_pred = knn_predict(X_train, y_train, X_test, k)
```

### Step 6: Evaluate the Model
```python
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# Calculate accuracy
test_accuracy = accuracy(y_test, y_pred)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
```

### Step 7: Plotting the Decision Boundary (2D Example)
If you have a dataset with 2 features, you can visualize the decision boundary:
```python
def plot_decision_boundary(X, y, model, k=3, resolution=0.01):
    # Setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = plt.get_cmap('viridis')

    # Plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = model(np.array([xx1.ravel(), xx2.ravel()]).T, k)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    # Plot all samples
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=colors[idx],
                    marker=markers[idx], label=cl,
                    edgecolor='black')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend(loc='upper left')
    plt.title('K-Nearest Neighbors Decision Boundary')
    plt.show()

# Example usage (only works with 2D data)
# plot_decision_boundary(X_train, y_train, lambda x, k: knn_predict(X_train, y_train, x, k), k)
```

### Additional Improvements and Extensions
- **Feature Scaling**: Ensuring all features are on a similar scale is crucial for KNN performance.
- **Distance Metrics**: Experiment with different distance metrics, such as Manhattan distance.
- **Weighted Voting**: Give more weight to closer neighbors when making predictions.
- **Hyperparameter Tuning**: Use cross-validation to find the optimal value of \( k \).

### Cross-Validation for Hyperparameter Tuning
```python
from sklearn.model_selection import cross_val_score

def knn_cv(X_train, y_train, k, cv=5):
    scores = []
    fold_size = len(X_train) // cv
    for i in range(cv):
        X_valid = X_train[i * fold_size:(i + 1) * fold_size]
        y_valid = y_train[i * fold_size:(i + 1) * fold_size]
        X_tr = np.concatenate([X_train[:i * fold_size], X_train[(i + 1) * fold_size:]], axis=0)
        y_tr = np.concatenate([y_train[:i * fold_size], y_train[(i + 1) * fold_size:]], axis=0)
        
        y_pred = knn_predict(X_tr, y_tr, X_valid, k)
        scores.append(accuracy(y_valid, y_pred))
    return np.mean(scores)

# Finding the best k value
k_values = range(1, 21)
cv_scores = [knn_cv(X_train, y_train, k) for k in k_values]

# Plotting the cross-validation scores
plt.plot(k_values, cv_scores)
plt.xlabel('k')
plt.ylabel('Cross-Validation Accuracy')
plt.title('Hyperparameter Tuning for k')
plt.show()

best_k = k_values[np.argmax(cv_scores)]
print(f'Best k: {best_k}')
```

-----------------------------------------------------------
----------------------------------------------------------
-----------------------------------------------------------
-----------------------------------------------------------
------------------------------------------------------------
----------

### Step 1: Import Necessary Libraries
```python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
```

### Step 2: Load and Preprocess the Data
```python
# Load dataset
data = pd.read_csv('your_dataset.csv')

# Separate features and target variable
X = data.iloc[:, :-1].values  # All columns except the last one
y = data.iloc[:, -1].values  # The last column

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
```

### Step 3: Define the Gini Impurity and Entropy Functions
```python
def gini(y):
    m = len(y)
    return 1.0 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

def entropy(y):
    m = len(y)
    return -sum((np.sum(y == c) / m) * np.log2(np.sum(y == c) / m) for c in np.unique(y))
```

### Step 4: Define the Function to Split the Dataset
```python
def split_dataset(X, y, feature_index, threshold):
    left_mask = X[:, feature_index] <= threshold
    right_mask = X[:, feature_index] > threshold
    return X[left_mask], X[right_mask], y[left_mask], y[right_mask]
```

### Step 5: Define the Function to Find the Best Split
```python
def best_split(X, y, criterion):
    best_feature, best_threshold, best_impurity = None, None, float('inf')
    n_features = X.shape[1]

    for feature_index in range(n_features):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            X_left, X_right, y_left, y_right = split_dataset(X, y, feature_index, threshold)
            if len(y_left) == 0 or len(y_right) == 0:
                continue

            impurity = (len(y_left) * criterion(y_left) + len(y_right) * criterion(y_right)) / len(y)
            if impurity < best_impurity:
                best_feature, best_threshold, best_impurity = feature_index, threshold, impurity

    return best_feature, best_threshold
```

### Step 6: Define the Decision Tree Node
```python
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf_node(self):
        return self.value is not None
```

### Step 7: Build the Decision Tree
```python
class DecisionTree:
    def __init__(self, criterion='gini', max_depth=None, min_samples_split=2):
        self.criterion = gini if criterion == 'gini' else entropy
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.root = None

    def fit(self, X, y):
        self.root = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        num_labels = len(np.unique(y))

        if (depth >= self.max_depth or num_labels == 1 or num_samples < self.min_samples_split):
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)

        feature, threshold = best_split(X, y, self.criterion)
        if feature is None:
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)

        X_left, X_right, y_left, y_right = split_dataset(X, y, feature, threshold)
        left_child = self._grow_tree(X_left, y_left, depth + 1)
        right_child = self._grow_tree(X_right, y_right, depth + 1)
        return Node(feature, threshold, left_child, right_child)

    def _most_common_label(self, y):
        counter = Counter(y)
        most_common = counter.most_common(1)[0][0]
        return most_common

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.root) for x in X])

    def _traverse_tree(self, x, node):
        if node.is_leaf_node():
            return node.value
        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)
```

### Step 8: Train the Model
```python
tree = DecisionTree(max_depth=10)
tree.fit(X_train, y_train)
```

### Step 9: Make Predictions
```python
y_pred = tree.predict(X_test)
```

### Step 10: Evaluate the Model
```python
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# Calculate accuracy
test_accuracy = accuracy(y_test, y_pred)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
```

### Step 11: Plot the Decision Tree (Optional)
Visualizing decision trees can be complex, especially with high-dimensional data. For 2D data, you can visualize the decision boundaries:
```python
def plot_decision_boundary(X, y, model, resolution=0.01):
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = plt.get_cmap('viridis')

    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = model.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=colors[idx],
                    marker=markers[idx], label=cl,
                    edgecolor='black')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend(loc='upper left')
    plt.title('Decision Tree Decision Boundary')
    plt.show()

# Example usage (only works with 2D data)
# plot_decision_boundary(X_train, y_train, tree)
```

### Additional Improvements and Extensions
- **Pruning**: Implement pruning techniques to avoid overfitting.
- **Feature Importance**: Calculate and visualize feature importance.
- **Cross-Validation**: Use cross-validation to optimize hyperparameters like `max_depth` and `min_samples_split`.

### Cross-Validation for Hyperparameter Tuning
```python
from sklearn.model_selection import cross_val_score

def cross_validation_accuracy(X, y, criterion='gini', max_depth=None, min_samples_split=2, cv=5):
    model = DecisionTree(criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split)
    scores = cross_val_score(model, X, y, cv=cv, scoring='accuracy')
    return np.mean(scores)

# Finding the best hyperparameters
depths = range(1, 21)
cv_scores = [cross_validation_accuracy(X_train, y_train, max_depth=depth) for depth in depths]

# Plotting the cross-validation scores
plt.plot(depths, cv_scores)
plt.xlabel('Max Depth')
plt.ylabel('Cross-Validation Accuracy')
plt.title('Hyperparameter Tuning for Max Depth')
plt.show()

best_depth = depths[np.argmax(cv_scores)]
print(f'Best Max Depth: {best_depth}')
```