In [None]:
import numpy as np
from collections import Counter

# ---------------------------
# Dataset
# ---------------------------
numbers = np.array([1, 2, 3, 4, 5])
labels = np.array(["Odd", "Even", "Odd", "Even", "Odd"])

# Feature engineering: x % 2
features = numbers % 2  # 0=Even, 1=Odd

# ---------------------------
# KNN function
# ---------------------------
def knn_predict(target, features, labels, k=3):
    """
    Predict label for target using k-NN in 1D
    - target: number to predict
    - features: engineered feature array (x % 2)
    - labels: descriptive label array ("Odd"/"Even")
    - k: number of neighbors
    """
    # Engineer target feature
    target_feature = target % 2
    
    # Formula of both Euclidean & Manhattan when 1D.
    # Compute distances
    distances = np.abs(features - target_feature)

    # Combine distances, numbers, and labels for sorting
    combined = list(zip(distances, numbers, labels))
    
    # Sort by distance (ascending)
    combined.sort(key=lambda x: x[0])
    
    # Pick k nearest neighbors
    nearest = combined[:k]
    
    print(f"Target: {target} (feature={target_feature})")
    print("Nearest neighbors (Distance, Number, Label):")
    for d, num, lab in nearest:
        print(f"{d} , {num} , {lab}")
    
    # Majority vote
    neighbor_labels = [lab for _, _, lab in nearest]
    vote = Counter(neighbor_labels)
    prediction = vote.most_common(1)[0][0]

    return prediction

# ---------------------------
# Test
# ---------------------------
print("Prediction:", knn_predict(6, features, labels, k=3))

Target: 6 (feature=0)
Nearest neighbors (Distance, Number, Label):
0 , 2 , Even
0 , 4 , Even
1 , 1 , Odd
Prediction: Even


In [None]:
# ---------------------------
# Dataset (example in 2D)
# ---------------------------
features = np.array([[1, 2],
                     [2, 3],
                     [3, 1],
                     [4, 5]])
labels = np.array(["A", "A", "B", "B"])

# ---------------------------
# Cosine similarity function
# ---------------------------
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# ---------------------------
# KNN using Cosine Similarity
# ---------------------------
def knn_cosine(target, features, labels, k=3):
    # Compute cosine similarity to all points
    similarities = [cosine_similarity(target, f) for f in features]

    print("Similarities: ", similarities)

    # Combine similarity, features, and labels
    combined = list(zip(similarities, features, labels))

    print("Combined:", combined)
    
    # Sort by similarity descending (higher similarity = closer)
    combined.sort(key=lambda x: x[0], reverse=True)

    print("Closed Similarities:", combined)
    
    # Pick k most similar neighbors
    nearest = combined[:k]
    
    print(f"Target: {target}")
    print("Nearest neighbors (Similarity, Features, Label):")
    for sim, feat, lab in nearest:
        print(f"{sim:.3f}, {feat}, {lab}")
    
    # Majority vote
    neighbor_labels = [lab for _, _, lab in nearest]
    vote = Counter(neighbor_labels)
    prediction = vote.most_common(1)[0][0]
    
    print(f"Prediction: {prediction}")
    return prediction

# ---------------------------
# Test
# ---------------------------
# Predicts as A, real answer is B because Cosine Similarity is not good in low dimension features.
target_point = np.array([4, 4]) 
knn_cosine(target_point, features, labels, k=3)


Similarities:  [np.float64(0.9486832980505138), np.float64(0.98058067569092), np.float64(0.8944271909999159), np.float64(0.9938837346736188)]
Combined: [(np.float64(0.9486832980505138), array([1, 2]), np.str_('A')), (np.float64(0.98058067569092), array([2, 3]), np.str_('A')), (np.float64(0.8944271909999159), array([3, 1]), np.str_('B')), (np.float64(0.9938837346736188), array([4, 5]), np.str_('B'))]
Closed Similarities: [(np.float64(0.9938837346736188), array([4, 5]), np.str_('B')), (np.float64(0.98058067569092), array([2, 3]), np.str_('A')), (np.float64(0.9486832980505138), array([1, 2]), np.str_('A')), (np.float64(0.8944271909999159), array([3, 1]), np.str_('B'))]
Target: [4 4]
Nearest neighbors (Similarity, Features, Label):
0.994, [4 5], B
0.981, [2 3], A
0.949, [1 2], A
Prediction: A


np.str_('A')