In [None]:
import numpy as np

# KNN

In [None]:
def distance(point1, point2):
    return np.linalg.norm(point1 - point2)

def knn_inferencing(dataset, testing_point, dim):

    min_length = dim * dim * 20
    min_label = 0

    x_test = testing_point[0:dim]

    # Loop through the dataset to extract the vec_x and y parameters
    for each in range(len(dataset)):
        # Extract the feature values (x_train) and class label (last element)
        x_train = dataset[each][0:dim]
        y_train = dataset[each][dim]

        # Calculate the distance between the training point and testing point
        dist = distance(x_train, x_test)

        # If the distance is smaller than the current minimum distance,
        # update the minimum distance and minimum label accordingly
        if dist < min_length:
            min_length = dist
            min_label = y_train

    # Return the label of the nearest neighbor
    return min_label


In [None]:
# Define dimension of the data
dim = 2

# Define the number of samples to generate
target_num_samples = 40  # This is the number of positive samples in the testing set

# Try 10 times
for r in range(30):

    # Initialize the dataset
    Sn = []  # List to store negative samples
    Sp = []  # List to store positive samples
    num_n = 0  # Counter for negative samples
    num_p = 0  # Counter for positive samples
    true_positive = 0  # Counter for true positive samples
    true_negative = 0  # Counter for true negative samples
    false_positive = 0  # Counter for false positive samples
    false_negative = 0  # Counter for false negative samples

    # Generate samples until the target number is reached for both positive and negative samples
    while num_p < target_num_samples * 2 or num_n < target_num_samples * 2:
        # Generate a random vector within the range [-5, 5] with dimension (dim+1)
        random_sample = np.random.uniform(-5, 5, (dim + 1))
        # Set the last dimension to 0 temporarily
        random_sample[dim] = 0
        # Compute the sum of the vector components
        sum_vector = np.sum(random_sample)
        # If the sum is positive, append the sample to Sp
        if sum_vector > 0:
            if num_p < target_num_samples * 2:
                random_sample[dim] = 1
                Sp.append(random_sample)
                num_p += 1
        elif num_n < target_num_samples * 2:
            random_sample[dim] = -1
            Sn.append(random_sample)
            num_n += 1

    # Split the dataset into training (Sp, Sn) and testing (Tp, Tn)
    # The last target_num_samples samples from Sn and Sp are used for testing.
    Tp = Sp[target_num_samples:]
    Tn = Sn[target_num_samples:]

    # The first target_num_samples samples from Sn and Sp are used for training.
    Sp = Sp[:target_num_samples]
    Sn = Sn[:target_num_samples]

    # Combine positive and negative samples to create the dataset
    dataset = Sn + Sp

    # Combine positive and negative samples to create the testing dataset
    testing_dataset = Tp + Tn

    # Loop through the testing_dataset
    for each in range(len(testing_dataset)):
        # Make a prediction using the knn_inferencing function with the training dataset and the testing sample
        prediction = knn_inferencing(dataset, testing_dataset[each], dim)

        # Increment the appropriate counters based on whether the inference is true positive, true negative, false positive, or false negative
        if testing_dataset[each][dim] == 1:
            if prediction == 1:
                true_positive += 1
            else:
                false_negative += 1
        else:
            if prediction == -1:
                true_negative += 1
            else:
                false_positive += 1

    # Calculate precision
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0

    # Calculate recall
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0

    # Calculate F1 score
    f1score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    # Print the result
    print("Case",r+1,"Precision:", precision, "Recall:", recall, "F1 Score:", f1score)


Case 1 Precision: 0.9302325581395349 Recall: 1.0 F1 Score: 0.963855421686747
Case 2 Precision: 1.0 Recall: 1.0 F1 Score: 1.0
Case 3 Precision: 0.9459459459459459 Recall: 0.875 F1 Score: 0.9090909090909091
Case 4 Precision: 0.8863636363636364 Recall: 0.975 F1 Score: 0.9285714285714285
Case 5 Precision: 1.0 Recall: 0.875 F1 Score: 0.9333333333333333
Case 6 Precision: 1.0 Recall: 0.925 F1 Score: 0.961038961038961
Case 7 Precision: 1.0 Recall: 0.975 F1 Score: 0.9873417721518987
Case 8 Precision: 1.0 Recall: 1.0 F1 Score: 1.0
Case 9 Precision: 1.0 Recall: 0.875 F1 Score: 0.9333333333333333
Case 10 Precision: 1.0 Recall: 0.95 F1 Score: 0.9743589743589743
Case 11 Precision: 0.95 Recall: 0.95 F1 Score: 0.9500000000000001
Case 12 Precision: 0.975 Recall: 0.975 F1 Score: 0.975
Case 13 Precision: 1.0 Recall: 0.85 F1 Score: 0.9189189189189189
Case 14 Precision: 0.95 Recall: 0.95 F1 Score: 0.9500000000000001
Case 15 Precision: 0.975 Recall: 0.975 F1 Score: 0.975
Case 16 Precision: 0.95 Recall: 0.95

In [None]:
# Define dimension of the data
dim = 10

# Define the number of samples to generate
target_num_samples = 200  # This is the number of positive samples in the testing set

# Try 10 times
for r in range(30):

    # Initialize the dataset
    Sn = []  # List to store negative samples
    Sp = []  # List to store positive samples
    num_n = 0  # Counter for negative samples
    num_p = 0  # Counter for positive samples
    true_positive = 0  # Counter for true positive samples
    true_negative = 0  # Counter for true negative samples
    false_positive = 0  # Counter for false positive samples
    false_negative = 0  # Counter for false negative samples

    # Generate samples until the target number is reached for both positive and negative samples
    while num_p < target_num_samples * 2 or num_n < target_num_samples * 2:
        # Generate a random vector within the range [-5, 5] with dimension (dim+1)
        random_sample = np.random.uniform(-5, 5, (dim + 1))
        # Set the last dimension to 0 temporarily
        random_sample[dim] = 0
        # Compute the sum of the vector components
        sum_vector = np.sum(random_sample)
        # If the sum is positive, append the sample to Sp
        if sum_vector > 0:
            if num_p < target_num_samples * 2:
                random_sample[dim] = 1
                Sp.append(random_sample)
                num_p += 1
        elif num_n < target_num_samples * 2:
            random_sample[dim] = -1
            Sn.append(random_sample)
            num_n += 1

    # Split the dataset into training (Sp, Sn) and testing (Tp, Tn)
    # The last target_num_samples samples from Sn and Sp are used for testing.
    Tp = Sp[target_num_samples:]
    Tn = Sn[target_num_samples:]

    # The first target_num_samples samples from Sn and Sp are used for training.
    Sp = Sp[:target_num_samples]
    Sn = Sn[:target_num_samples]

    # Combine positive and negative samples to create the dataset
    dataset = Sn + Sp

    # Combine positive and negative samples to create the testing dataset
    testing_dataset = Tp + Tn

    # Loop through the testing_dataset
    for each in range(len(testing_dataset)):
        # Make a prediction using the knn_inferencing function with the training dataset and the testing sample
        prediction = knn_inferencing(dataset, testing_dataset[each], dim)

        # Increment the appropriate counters based on whether the inference is true positive, true negative, false positive, or false negative
        if testing_dataset[each][dim] == 1:
            if prediction == 1:
                true_positive += 1
            else:
                false_negative += 1
        else:
            if prediction == -1:
                true_negative += 1
            else:
                false_positive += 1

    # Calculate precision
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) != 0 else 0

    # Calculate recall
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) != 0 else 0

    # Calculate F1 score
    f1score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    # Print the result
    print("Case",r+1,"Precision:", precision, "Recall:", recall, "F1 Score:", f1score)

Case 1 Precision: 0.7868020304568528 Recall: 0.775 F1 Score: 0.7808564231738037
Case 2 Precision: 0.7801047120418848 Recall: 0.745 F1 Score: 0.7621483375959079
Case 3 Precision: 0.8090452261306532 Recall: 0.805 F1 Score: 0.8070175438596492
Case 4 Precision: 0.7740384615384616 Recall: 0.805 F1 Score: 0.7892156862745099
Case 5 Precision: 0.8134715025906736 Recall: 0.785 F1 Score: 0.7989821882951654
Case 6 Precision: 0.7537688442211056 Recall: 0.75 F1 Score: 0.7518796992481204
Case 7 Precision: 0.7511961722488039 Recall: 0.785 F1 Score: 0.7677261613691931
Case 8 Precision: 0.8256410256410256 Recall: 0.805 F1 Score: 0.8151898734177216
Case 9 Precision: 0.7860696517412935 Recall: 0.79 F1 Score: 0.7880299251870324
Case 10 Precision: 0.801980198019802 Recall: 0.81 F1 Score: 0.8059701492537314
Case 11 Precision: 0.8324324324324325 Recall: 0.77 F1 Score: 0.8000000000000002
Case 12 Precision: 0.805 Recall: 0.805 F1 Score: 0.805
Case 13 Precision: 0.8256410256410256 Recall: 0.805 F1 Score: 0.8151