In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## **Non-Linear Dataset**

In [61]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Assume X_train, y_train, X_test, y_test are your training and testing sets
#data = np.loadtxt('/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/NLS_Group01/class1.txt')
# Step 1: Load data from each text file
file_paths = ["/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/NLS_Group01/class1.txt",
              "/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/NLS_Group01/class2.txt",
              "/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/NLS_Group01/class3.txt"]

# Load data for each class
data = []
labels = []

for i, file_path in enumerate(file_paths):
    class_data = np.loadtxt(file_path)
    labels.extend([i] * class_data.shape[0])
    data.append(class_data)

# Combine data from all classes
X = np.vstack(data)
y = np.array(labels)

# Step 2: Split the data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Iterate through different values of k
for k in range(1, 6):
    knn = NearestNeighbors(n_neighbors=k)

    if len(np.unique(y_train)) < 2:
        raise ValueError(f"Not enough samples for each class in training set for k={k}.")

    knn.fit(X_train)

    distances_class1, _ = knn.kneighbors(X_train[y_train == 0], n_neighbors=k)
    distances_class2, _ = knn.kneighbors(X_train[y_train == 1], n_neighbors=k)

    density_class1 = np.mean(distances_class1, axis=1)
    density_class2 = np.mean(distances_class2, axis=1)

    predictions = []

    for x_test_point in X_test:
        likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
        likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())

        posterior_class1 = likelihood_class1 * np.sum(y_train == 0) / len(y_train)
        posterior_class2 = likelihood_class2 * np.sum(y_train == 1) / len(y_train)

        predicted_class = 0 if posterior_class1 > posterior_class2 else 1
        predictions.append(predicted_class)

    print(f"Results for k={k}:")
    print(classification_report(y_test, predictions))

  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1

Results for k=1:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        80
           1       0.29      1.00      0.45       157
           2       0.00      0.00      0.00       303

    accuracy                           0.29       540
   macro avg       0.10      0.33      0.15       540
weighted avg       0.08      0.29      0.13       540

Results for k=2:
              precision    recall  f1-score   support

           0       0.15      1.00      0.26        80
           1       1.00      0.08      0.15       157
           2       0.00      0.00      0.00       303

    accuracy                           0.17       540
   macro avg       0.38      0.36      0.14       540
weighted avg       0.31      0.17      0.08       540



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=3:
              precision    recall  f1-score   support

           0       0.15      1.00      0.26        80
           1       1.00      0.06      0.12       157
           2       0.00      0.00      0.00       303

    accuracy                           0.17       540
   macro avg       0.38      0.35      0.13       540
weighted avg       0.31      0.17      0.07       540



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=4:
              precision    recall  f1-score   support

           0       0.15      1.00      0.26        80
           1       1.00      0.06      0.11       157
           2       0.00      0.00      0.00       303

    accuracy                           0.16       540
   macro avg       0.38      0.35      0.12       540
weighted avg       0.31      0.16      0.07       540



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=5:
              precision    recall  f1-score   support

           0       0.15      1.00      0.26        80
           1       1.00      0.09      0.16       157
           2       0.00      0.00      0.00       303

    accuracy                           0.17       540
   macro avg       0.38      0.36      0.14       540
weighted avg       0.31      0.17      0.09       540



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## **Linear Dataset**

In [63]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Assume X_train, y_train, X_test, y_test are your training and testing sets
#data = np.loadtxt('/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/NLS_Group01/class1.txt')
# Step 1: Load data from each text file
file_paths = ["/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/LS_Group01/Class1.txt",
              "/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/LS_Group01/Class2.txt",
              "/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment1_code/Group01/LS_Group01/Class3.txt"]

# Load data for each class
data = []
labels = []

for i, file_path in enumerate(file_paths):
    class_data = np.loadtxt(file_path)
    labels.extend([i] * class_data.shape[0])
    data.append(class_data)

# Combine data from all classes
X = np.vstack(data)
y = np.array(labels)

# Step 2: Split the data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Iterate through different values of k
for k in range(1, 6):
    knn = NearestNeighbors(n_neighbors=k)

    if len(np.unique(y_train)) < 2:
        raise ValueError(f"Not enough samples for each class in training set for k={k}.")

    knn.fit(X_train)

    distances_class1, _ = knn.kneighbors(X_train[y_train == 0], n_neighbors=k)
    distances_class2, _ = knn.kneighbors(X_train[y_train == 1], n_neighbors=k)

    density_class1 = np.mean(distances_class1, axis=1)
    density_class2 = np.mean(distances_class2, axis=1)

    predictions = []

    for x_test_point in X_test:
        likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
        likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())

        posterior_class1 = likelihood_class1 * np.sum(y_train == 0) / len(y_train)
        posterior_class2 = likelihood_class2 * np.sum(y_train == 1) / len(y_train)

        predicted_class = 0 if posterior_class1 > posterior_class2 else 1
        predictions.append(predicted_class)

    print(f"Results for k={k}:")
    print(classification_report(y_test, predictions))

  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1

Results for k=1:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       148
           1       0.31      1.00      0.48       141
           2       0.00      0.00      0.00       161

    accuracy                           0.31       450
   macro avg       0.10      0.33      0.16       450
weighted avg       0.10      0.31      0.15       450

Results for k=2:
              precision    recall  f1-score   support

           0       0.33      1.00      0.49       148
           1       0.00      0.00      0.00       141
           2       0.00      0.00      0.00       161

    accuracy                           0.33       450
   macro avg       0.11      0.33      0.16       450
weighted avg       0.11      0.33      0.16       450



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=3:
              precision    recall  f1-score   support

           0       0.33      1.00      0.49       148
           1       0.00      0.00      0.00       141
           2       0.00      0.00      0.00       161

    accuracy                           0.33       450
   macro avg       0.11      0.33      0.16       450
weighted avg       0.11      0.33      0.16       450



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=4:
              precision    recall  f1-score   support

           0       0.33      1.00      0.49       148
           1       0.00      0.00      0.00       141
           2       0.00      0.00      0.00       161

    accuracy                           0.33       450
   macro avg       0.11      0.33      0.16       450
weighted avg       0.11      0.33      0.16       450



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=5:
              precision    recall  f1-score   support

           0       0.33      1.00      0.49       148
           1       0.00      0.00      0.00       141
           2       0.00      0.00      0.00       161

    accuracy                           0.33       450
   macro avg       0.11      0.33      0.16       450
weighted avg       0.11      0.33      0.16       450



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## **Dataset:2**

In [67]:
import os
import numpy as np

train_folder = "/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment5_code/BOVW/train"

train_data = []
train_labels = []

class_folders = os.listdir(train_folder)
for i, class_folder in enumerate(class_folders):
    class_path = os.path.join(train_folder, class_folder)
    files = os.listdir(class_path)

    for file in files:
        file_path = os.path.join(class_path, file)
        bovw_representation = np.load(file_path)
        train_data.append(bovw_representation)
        train_labels.append(i)

X_train = np.vstack(train_data)
y_train = np.array(train_labels)

In [69]:
test_folder = "/content/drive/MyDrive/PR_Assignment-2/CS699-AS1-2-3-4-5/Group01_Assignment5_code/BOVW/test"

test_data = []
test_labels = []

class_folders = os.listdir(test_folder)
for i, class_folder in enumerate(class_folders):
    class_path = os.path.join(test_folder, class_folder)
    files = os.listdir(class_path)

    for file in files:
        file_path = os.path.join(class_path, file)
        bovw_representation = np.load(file_path)
        test_data.append(bovw_representation)
        test_labels.append(i)
X_test = np.vstack(test_data)
y_test = np.array(test_labels)

In [70]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Step 2: Split the data into training and testing sets (70% training, 30% testing)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Iterate through different values of k
for k in range(1, 6):
    knn = NearestNeighbors(n_neighbors=k)

    if len(np.unique(y_train)) < 2:
        raise ValueError(f"Not enough samples for each class in training set for k={k}.")

    knn.fit(X_train)

    distances_class1, _ = knn.kneighbors(X_train[y_train == 0], n_neighbors=k)
    distances_class2, _ = knn.kneighbors(X_train[y_train == 1], n_neighbors=k)

    density_class1 = np.mean(distances_class1, axis=1)
    density_class2 = np.mean(distances_class2, axis=1)

    predictions = []

    for x_test_point in X_test:
        likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
        likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())

        posterior_class1 = likelihood_class1 * np.sum(y_train == 0) / len(y_train)
        posterior_class2 = likelihood_class2 * np.sum(y_train == 1) / len(y_train)

        predicted_class = 0 if posterior_class1 > posterior_class2 else 1
        predictions.append(predicted_class)

    print(f"Results for k={k}:")
    print(classification_report(y_test, predictions))

  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1.std())
  likelihood_class2 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class2.std())
  likelihood_class1 = np.exp(-np.mean(knn.kneighbors([x_test_point], n_neighbors=k)[0]) / density_class1

Results for k=1:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        50
           1       0.33      1.00      0.50        50
           2       0.00      0.00      0.00        50

    accuracy                           0.33       150
   macro avg       0.11      0.33      0.17       150
weighted avg       0.11      0.33      0.17       150

Results for k=2:
              precision    recall  f1-score   support

           0       0.33      1.00      0.50        50
           1       0.00      0.00      0.00        50
           2       0.00      0.00      0.00        50

    accuracy                           0.33       150
   macro avg       0.11      0.33      0.17       150
weighted avg       0.11      0.33      0.17       150

Results for k=3:
              precision    recall  f1-score   support

           0       0.33      1.00      0.50        50
           1       0.00      0.00      0.00        50
           2       0.00 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Results for k=4:
              precision    recall  f1-score   support

           0       0.33      1.00      0.50        50
           1       0.00      0.00      0.00        50
           2       0.00      0.00      0.00        50

    accuracy                           0.33       150
   macro avg       0.11      0.33      0.17       150
weighted avg       0.11      0.33      0.17       150

Results for k=5:
              precision    recall  f1-score   support

           0       0.33      1.00      0.50        50
           1       0.00      0.00      0.00        50
           2       0.00      0.00      0.00        50

    accuracy                           0.33       150
   macro avg       0.11      0.33      0.17       150
weighted avg       0.11      0.33      0.17       150



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
