In [1]:
#import starements
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def print_data(X_train, X_test, y_train, y_test):
    print("Training Data:")
    for i, point in enumerate(X_train):
        print(point, "    ", y_train[i])
    print("\nTest Data:")
    for i, point in enumerate(X_test):
        print(point, "    ", y_test[i])

In [3]:
def generate_data(TOTAL_SAMPLE_SIZE,n,d,m,s):
    label_names = ["good", "bad", "ugly"]
    #Seed
    rng = np.random.default_rng(seed = 42)
    
    #Generate Data set and labels
    total_sample_set = rng.normal(m, s, d*TOTAL_SAMPLE_SIZE).reshape(TOTAL_SAMPLE_SIZE, d)
    labels = rng.choice(label_names, TOTAL_SAMPLE_SIZE)
    
    #Get the indicies for separating test and training data
    training_ind = np.random.choice(np.arange(TOTAL_SAMPLE_SIZE), n, replace=False) #get random indicies for training data
    test_ind = np.setdiff1d(np.arange(TOTAL_SAMPLE_SIZE), training_ind) #get the indicies not chosen
    
    #Separate the data
    X_train = total_sample_set[training_ind]
    y_train = labels[training_ind]
    
    X_test = total_sample_set[test_ind]
    y_test = labels[test_ind]
    
    return X_train, X_test, y_train, y_test

In [4]:
def weigh(distance):
    return 1/(distance + 0.0001)

def distances(train, point):
    return np.linalg.norm((train - point), axis=1)

def vote(distances, kclasses, k):
    labels = ["good", "bad", "ugly"]
    
    votes = np.zeros(len(labels), dtype=np.float32)
    
    weight = np.vectorize(weigh)
    
    w = weight(distances)
    
    votes = [np.sum(w[kclasses == labels[0]]), np.sum(w[kclasses == labels[1]]), np.sum(w[kclasses == labels[2]])]
    
    return labels[np.argmax(votes)] #get the winner

def kneighbors(X_train, y_train, X_test, k):
    classes = []
    for point in X_test:
        dists = distances(X_train, point)      #1.Find its Euclidean distance from each of the n points in the training data set 
        smallestK = np.argsort(dists)[:k]    #2.Pick the nearest K points (returns the indicies)
        classes.append(vote(dists[smallestK], y_train[smallestK], k)) #3.Output the class by weighted voting using the K nearest neighbors in the above step 

    return classes

In [5]:
TOTAL_SAMPLE_SIZE = 50 # Total number of samples (data points or vectors) in the training set plus test set
n = 45 #Number of samples (data points or vectors) in the training set
d = 3 #Number of features
K = 9 #Stipulated number of nearest neighbors
m = 5 #mean of normal distribution
s = 2 #std of normal distrubution

In [6]:
# Run 1
X_train, X_test, y_train, y_test = generate_data(TOTAL_SAMPLE_SIZE, n, d, m, s)
print_data(X_train, X_test, y_train, y_test)

result = kneighbors(X_train, y_train, X_test, K)
print(result)
print((np.sum(result == y_test)/len(result))*100)

Training Data:
[1.63426046 4.33022994 5.32550613]      ugly
[6.17244466 6.42245316 6.58669447]      good
[5.02498824 5.96149332 5.89306235]      ugly
[4.58912488 3.09995589 4.32193385]      ugly
[3.29391214 6.75879595 6.55558387]      bad
[4.28747206 6.47503114 3.13276464]      good
[5.1320614  7.25448241 5.93501868]      good
[4.30254986 4.07529641 6.71595176]      good
[2.95300501 5.35855127 5.43999337]      ugly
[4.84056358 1.62533113 2.10577506]      ugly
[6.38097071 4.14549471 5.31707938]      bad
[6.35782713 5.13515814 5.5782388 ]      good
[5.60943416 2.92003179 6.50090239]      bad
[4.61739135 2.44862735 2.73342557]      good
[4.05925469 3.7222443  4.4497155 ]      bad
[7.92660578 2.62247389 3.72049693]      ugly
[9.2832952  4.18716997 3.97551454]      good
[6.26257645 2.08568836 4.36065757]      good
[6.68061627 1.54535915 5.86884729]      bad
[7.71837515 6.67022249 5.71374212]      good
[6.25118079 4.38130692 5.91355048]      bad
[3.37411781 4.16928548 3.7758064 ]      good
[

In [7]:
# Run 2
X_train, X_test, y_train, y_test = generate_data(TOTAL_SAMPLE_SIZE, n, d, m, s)
print_data(X_train, X_test, y_train, y_test)

result = kneighbors(X_train, y_train, X_test, K)
print(result)
print((np.sum(result == y_test)/len(result))*100)

Training Data:
[5.73088813 5.82546522 5.86164201]      good
[4.77210508 3.31968705 3.35103757]      ugly
[3.67614812 4.27389231 4.23652421]      bad
[4.30254986 4.07529641 6.71595176]      good
[6.30118558 6.48650834 6.08630854]      ugly
[3.29391214 6.75879595 6.55558387]      bad
[5.25568081 4.36751482 4.96639768]      good
[6.17244466 6.42245316 6.58669447]      good
[2.95300501 5.35855127 5.43999337]      ugly
[ 8.99346178 10.82772493  5.82881887]      bad
[5.4754712  3.81170009 2.10788429]      good
[6.27030189 4.55555461 2.05838741]      good
[3.37245454 6.23195885 7.25794459]      bad
[3.37411781 4.16928548 3.7758064 ]      good
[3.18904189 4.24367489 7.5984566 ]      good
[6.26257645 2.08568836 4.36065757]      good
[3.14684812 4.22038039 2.2466277 ]      ugly
[5.14425902 3.94101458 5.46535242]      bad
[4.61739135 2.44862735 2.73342557]      good
[6.33077022 4.80302903 4.15340338]      bad
[3.63814091 7.44508268 4.69094104]      ugly
[1.63426046 4.33022994 5.32550613]      ugl