In [21]:
import numpy as np
import pandas as pd

def getEuclideanDistance(a, b):
    return np.linalg.norm(a - b)

def getNearestNeighbors(train, test, k):
    distances = []
    for i in range(len(train)):
        distances.append((i, getEuclideanDistance(train[i], test)))
    distances.sort(key=lambda x: x[1])
    return distances[:k]

In [10]:
df = pd.read_csv('Exam Data.csv')
df.drop('ID', axis=1, inplace=True)
df.head()

Unnamed: 0,Programming Languages,Delivered Assignments,GPA,Passed Exam
0,3,5,3.1,1.0
1,3,5,2.0,0.0
2,2,7,3.5,1.0
3,4,9,2.5,1.0
4,3,11,3.9,0.0


In [11]:
print(df.columns)

Index(['Programming Languages', 'Delivered Assignments', 'GPA', 'Passed Exam'], dtype='object')


In [28]:
features = df[['Programming Languages', 'Delivered Assignments', 'GPA']]
features_values = features.values
labels = df['Passed Exam']
labels_values = labels.values
k = 3

# Test indices
test_indices = [8, 9, 10, 11]

for idx in test_indices:
    test_instance = features_values[idx]
    neighbors = getNearestNeighbors(features_values[:8], test_instance, k)
    print(f"Student No. {idx+1}: Nearest neighbors are: {[n[0]+1 for n in neighbors]}, "
    f"their distances are: {[round(n[1], 3) for n in neighbors]}")
    neighbor_indices = [n[0] for n in neighbors]
    neighbor_labels = [labels_values[i] for i in neighbor_indices]
    predicted_label = max(set(neighbor_labels), key=neighbor_labels.count)
    if predicted_label == 1.0:
        print(f"Student No. {idx+1}: Predicted Passed Exam = Yes")
    else:
        print(f"Student No. {idx+1}: Predicted Passed Exam = No")
    print()

Student No. 9: Nearest neighbors are: [6, 7, 1], their distances are: [1.005, 1.792, 3.164]
Student No. 9: Predicted Passed Exam = No

Student No. 10: Nearest neighbors are: [6, 7, 1], their distances are: [1.487, 2.1, 2.193]
Student No. 10: Predicted Passed Exam = No

Student No. 11: Nearest neighbors are: [2, 1, 3], their distances are: [1.414, 1.792, 2.693]
Student No. 11: Predicted Passed Exam = Yes

Student No. 12: Nearest neighbors are: [1, 2, 3], their distances are: [1.077, 1.803, 2.0]
Student No. 12: Predicted Passed Exam = Yes



In [37]:
def minMaxScaler(data, scaler):
    return (data - scaler.min()) / (scaler.max() - scaler.min())

training_features = features[:8]
print("Original features:\n", training_features)

scaled_training_features = minMaxScaler(training_features, training_features)
print("Scaled features using minMaxScaler:\n", scaled_training_features)

testing_features = features[8:]
print("Original testing features:\n", testing_features)

scaled_testing_features = minMaxScaler(testing_features, training_features)
print("Scaled testing features:\n", scaled_testing_features)

Original features:
    Programming Languages  Delivered Assignments  GPA
0                      3                      5  3.1
1                      3                      5  2.0
2                      2                      7  3.5
3                      4                      9  2.5
4                      3                     11  3.9
5                      2                      3  2.9
6                      3                      3  1.9
7                      4                      9  3.2
Scaled features using minMaxScaler:
    Programming Languages  Delivered Assignments   GPA
0                    0.5                   0.25  0.60
1                    0.5                   0.25  0.05
2                    0.0                   0.50  0.80
3                    1.0                   0.75  0.30
4                    0.5                   1.00  1.00
5                    0.0                   0.00  0.50
6                    0.5                   0.00  0.00
7                    1.0          

In [46]:
scaled_training_features.head(8)

Unnamed: 0,Programming Languages,Delivered Assignments,GPA
0,0.5,0.25,0.6
1,0.5,0.25,0.05
2,0.0,0.5,0.8
3,1.0,0.75,0.3
4,0.5,1.0,1.0
5,0.0,0.0,0.5
6,0.5,0.0,0.0
7,1.0,0.75,0.65


In [45]:
scaled_testing_features.head()

Unnamed: 0,Programming Languages,Delivered Assignments,GPA
8,0.0,-0.125,0.55
9,0.5,0.0,1.05
10,1.0,0.375,0.05
11,0.0,0.25,0.8


In [47]:
for item in scaled_testing_features.values:
    neighbors = getNearestNeighbors(scaled_training_features.values, item, k)
    print(f"Nearest neighbors are: {[n[0]+1 for n in neighbors]}, "
    f"their distances are: {[round(n[1], 3) for n in neighbors]}")
    neighbor_indices = [n[0] for n in neighbors]
    neighbor_labels = [labels[i] for i in neighbor_indices]
    predicted_label = max(set(neighbor_labels), key=neighbor_labels.count)
    if predicted_label == 1.0:
        print(f"Predicted Passed Exam = Yes")
    else:
        print(f"Predicted Passed Exam = No")

Nearest neighbors are: [6, 1, 3], their distances are: [0.135, 0.627, 0.673]
Predicted Passed Exam = Yes
Nearest neighbors are: [1, 6, 3], their distances are: [0.515, 0.743, 0.75]
Predicted Passed Exam = Yes
Nearest neighbors are: [4, 2, 7], their distances are: [0.451, 0.515, 0.627]
Predicted Passed Exam = No
Nearest neighbors are: [3, 6, 1], their distances are: [0.25, 0.391, 0.539]
Predicted Passed Exam = Yes


In [54]:
import numpy as np
import pandas as pd

# Load data
q5_df = pd.read_csv('q5.csv')
features_values = q5_df[['x', 'y']].values
labels_values = q5_df['label'].values

# Define K (can be 1 or other values)
ks = [1, 3, 5, 9]

# Leave-One-Out Cross Validation (LOO-CV)
correct_predictions = 0
for k in ks:
    for i in range(len(features_values)):
        test_instance = features_values[i]

        train_features = np.delete(features_values, i, axis=0)
        train_labels = np.delete(labels_values, i, axis=0)
        
        neighbors = getNearestNeighbors(train_features, test_instance, k)
        
        neighbor_indices = [n[0] for n in neighbors]  # These indices are for the train_features array
        neighbor_labels = [train_labels[j] for j in neighbor_indices]
        
        predicted_label = max(set(neighbor_labels), key=neighbor_labels.count)
        if predicted_label == labels_values[i]:
            correct_predictions += 1

    # Calculate and print accuracy
    accuracy = correct_predictions / len(features_values) * 100
    print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 10.00%
Accuracy: 30.00%
Accuracy: 70.00%
Accuracy: 130.00%
