### Loading the Data

In [None]:
from keras.datasets import cifar10
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import timeit
from sklearn.metrics import classification_report

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Combine train and test datasets
x_combined = np.concatenate((x_train, x_test), axis=0)
y_combined = np.concatenate((y_train, y_test), axis=0)

# Print the shapes of data
print("Shape of x_combined:", x_combined.shape)
print("Shape of y_combined:", y_combined.shape)

# Plot one image from each class
plt.figure(figsize=(10, 5))
for i in range(len(class_names)):
    class_indices = np.where(y_combined.flatten() == i)[0]
    image_index = class_indices[0]
    image = x_combined[image_index]
    plt.subplot(2, 5, i+1)
    plt.imshow(image)
    plt.title(class_names[i])
    plt.axis('off')
plt.show()


In [None]:
# Normalize pixel values to [0, 1]
x_combined = x_combined.astype('float32') / 255.0

# Reshape the data
x_combined = x_combined.reshape(x_combined.shape[0], -1)

# Print the shape of the data
print("Shape of x_combined:", x_combined.shape)
print("Shape of y_combined:", y_combined.shape)

# Split the data into train, validation, and test sets using stratified sampling
x_train, x_temp, y_train, y_temp = train_test_split(x_combined, y_combined, test_size=0.2, random_state=42, stratify=y_combined)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Flatten the target variables
y_train = y_train.ravel()
y_val = y_val.ravel()
y_test = y_test.ravel()

### Performing KNN classifiaction on Raw Data

In [None]:
# 1. KNN classification for Raw Data

start_time1 = timeit.default_timer()
# Initialize K-Nearest Neighbors classifier
knn = KNeighborsClassifier(n_neighbors=20)

# Train the classifier
knn.fit(x_train, y_train)

# Predict on validation set
y_val_pred = knn.predict(x_val)

# Calculate accuracy and F1 score on validation set
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='macro')
print("Validation Accuracy: {:.2f}%".format(val_accuracy * 100))
print("Validation F1 Score: {:.2f}".format(val_f1_score))

# Predict on test set
y_test_pred = knn.predict(x_test)

# Calculate accuracy and F1 score on test set
test_accuracy = accuracy_score(y_test, y_test_pred)
test_f1_score = f1_score(y_test, y_test_pred, average='macro')
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))
print("Test F1 Score: {:.2f}".format(test_f1_score))

rawdata_time = timeit.default_timer() - start_time1
print("Execution time for Raw data: {:.2f} seconds".format(rawdata_time))

In [None]:
# Class-wise accuracy
# For validation set
class_accuracy_val = []
for i in range(len(class_names)):
    class_indices_val = np.where(y_val == i)[0]
    class_accuracy_val.append(accuracy_score(y_val[class_indices_val], y_val_pred[class_indices_val]))

# Print class-wise accuracy on validation set
print("Class-wise Accuracy on Validation Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_val[i] * 100))

# For test set
class_accuracy_test = []
for i in range(len(class_names)):
    class_indices_test = np.where(y_test == i)[0]
    class_accuracy_test.append(accuracy_score(y_test[class_indices_test], y_test_pred[class_indices_test]))

# Print class-wise accuracy on test set
print("\nClass-wise Accuracy on Test Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_test[i] * 100))

# Print classification report for validation set
print("Classification Report for Validation Set:")
print(classification_report(y_val, y_val_pred, target_names=class_names))

# Print classification report for test set
print("Classification Report for Test Set:")
print(classification_report(y_test, y_test_pred, target_names=class_names))


### Finding the best `n_component` for PCA and kPCA

In [None]:
# Finding optimal n_components
# Apply PCA
pca = PCA(n_components=3072)
pca.fit(x_train)

# Calculate cumulative explained variance ratio
cumulative_variance_ratio = np.cumsum(pca.explained_variance_ratio_)

# Plot cumulative explained variance ratio
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(cumulative_variance_ratio) + 1), cumulative_variance_ratio, marker='o', linestyle='-')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.title('Cumulative Explained Variance Ratio vs. Number of Components')
plt.grid(True)
plt.show()

# Plot cumulative explained variance ratio for components from 100 to 400
plt.figure(figsize=(10, 6))
plt.plot(range(100, 401), cumulative_variance_ratio[99:400], marker='o', linestyle='-')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.title('Cumulative Explained Variance Ratio vs. Number of Components (Components 100-400)')
plt.grid(True)
plt.show()

# Plot cumulative explained variance ratio for components from 200 to 250
plt.figure(figsize=(10, 6))
plt.plot(range(200, 251), cumulative_variance_ratio[199:250], marker='o', linestyle='-')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance Ratio')
plt.title('Cumulative Explained Variance Ratio vs. Number of Components (Components 200-250)')
plt.grid(True)
plt.show()

In [None]:
# Plot for Accuracy vs Number of components
# For components from 200-250
components_range = range(200, 251)

accuracy_values = []

for n_components in components_range:
    # Apply PCA with the current number of components
    pca = PCA(n_components=n_components)
    x_train_pca = pca.fit_transform(x_train)
    x_val_pca = pca.transform(x_val)

    # Initialize and train the KNN classifier
    knn = KNeighborsClassifier(n_neighbors=20)
    knn.fit(x_train_pca, y_train)

    # Predict on the validation set
    y_val_pred = knn.predict(x_val_pca)

    # Calculate accuracy and append
    accuracy = accuracy_score(y_val, y_val_pred)
    accuracy_values.append(accuracy)

# Plot accuracy vs. number of components
plt.figure(figsize=(10, 6))
plt.plot(components_range, accuracy_values, marker='o', linestyle='-')
plt.xlabel('Number of Components')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Number of Components (Components 200-250)')
plt.grid(True)
plt.show()


### 2-D representation of Raw data

In [None]:
# Plot 2-D representation of raw data
plt.figure(figsize=(8, 6))
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, cmap='viridis', s=10)
plt.title('2-D Representation of Raw CIFAR-10 Data')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='Class')
plt.grid(True)
plt.show()

### Performing KNN classification on PCA transformed data

In [None]:
# 2. KNN classification with PCA

# Apply PCA
start_time2 = timeit.default_timer()
pca = PCA(n_components=227)  # 227 number of components
x_train_pca = pca.fit_transform(x_train)
x_val_pca = pca.transform(x_val)
x_test_pca = pca.transform(x_test)

# Initialize KNN classifier
knn = KNeighborsClassifier(n_neighbors=20)

# Train the classifier
knn.fit(x_train_pca, y_train)

# Predict on validation set
y_val_pred = knn.predict(x_val_pca)

# Calculate accuracy and F1 score on validation set
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='macro')
print("Validation Accuracy: {:.2f}%".format(val_accuracy * 100))
print("Test F1 Score: {:.2f}".format(test_f1_score))

# Predict on test set
y_test_pred = knn.predict(x_test_pca)

# Calculate accuracy and F1 score on test set
test_accuracy = accuracy_score(y_test, y_test_pred)
test_f1_score = f1_score(y_test, y_test_pred, average='macro')
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))
print("Test F1 Score: {:.2f}".format(test_f1_score))

pcadata_time = timeit.default_timer() - start_time2
print("Execution time for PCA data: {:.2f} seconds".format(pcadata_time))


In [None]:
# Class-wise accuracy
# For validation set
class_accuracy_val = []
for i in range(len(class_names)):
    class_indices_val = np.where(y_val == i)[0]
    class_accuracy_val.append(accuracy_score(y_val[class_indices_val], y_val_pred[class_indices_val]))

# Print class-wise accuracy on validation set
print("Class-wise Accuracy on Validation Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_val[i] * 100))

# For test set
class_accuracy_test = []
for i in range(len(class_names)):
    class_indices_test = np.where(y_test == i)[0]
    class_accuracy_test.append(accuracy_score(y_test[class_indices_test], y_test_pred[class_indices_test]))

# Print class-wise accuracy on test set
print("\nClass-wise Accuracy on Test Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_test[i] * 100))

# Print classification report for validation set
print("Classification Report for Validation Set:")
print(classification_report(y_val, y_val_pred, target_names=class_names))

# Print classification report for test set
print("Classification Report for Test Set:")
print(classification_report(y_test, y_test_pred, target_names=class_names))


### 2-D Representation for PCA data

In [None]:
# Plot 2-D representation of PCA data
plt.figure(figsize=(8, 6))
plt.scatter(x_train_pca[:, 0], x_train_pca[:, 1], c=y_train, cmap='viridis', s=10)
plt.title('2-D Representation of PCA Data')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.colorbar(label='Class')
plt.grid(True)
plt.show()

### Performing KNN classification kPCA transformed data

In [None]:
# 3. KNN classification with kPCA

# Apply Kernel PCA for dimensionality reduction
start_time3 = timeit.default_timer()
kpca = KernelPCA(n_components=227, kernel='rbf', gamma=0.01)
x_train_kpca = kpca.fit_transform(x_train)
x_val_kpca = kpca.transform(x_val)
x_test_kpca = kpca.transform(x_test)

# Initialize KNN classifier
knn = KNeighborsClassifier(n_neighbors=20)

# Train the classifier
knn.fit(x_train_kpca, y_train)

# Predict on validation set
y_val_pred = knn.predict(x_val_kpca)

# Calculate accuracy and F1 score on validation set
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='macro')
print("Validation Accuracy: {:.2f}%".format(val_accuracy * 100))
print("Validation F1 Score: {:.2f}".format(val_f1_score))

# Predict on test set
y_test_pred = knn.predict(x_test_kpca)

# Calculate accuracy and F1 score on test set
test_accuracy = accuracy_score(y_test, y_test_pred)
test_f1_score = f1_score(y_test, y_test_pred, average='macro')
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))
print("Test F1 Score: {:.2f}".format(test_f1_score))

kpcadata_time = timeit.default_timer() - start_time3
print("Execution time for kPCA data: {:.2f} seconds".format(kpcadata_time))

In [None]:
# Class-wise accuracy
# For validation set
class_accuracy_val = []
for i in range(len(class_names)):
    class_indices_val = np.where(y_val == i)[0]
    class_accuracy_val.append(accuracy_score(y_val[class_indices_val], y_val_pred[class_indices_val]))

# Print class-wise accuracy on validation set
print("Class-wise Accuracy on Validation Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_val[i] * 100))

# For test set
class_accuracy_test = []
for i in range(len(class_names)):
    class_indices_test = np.where(y_test == i)[0]
    class_accuracy_test.append(accuracy_score(y_test[class_indices_test], y_test_pred[class_indices_test]))

# Print class-wise accuracy on test set
print("\nClass-wise Accuracy on Test Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_test[i] * 100))

# Print classification report for validation set
print("Classification Report for Validation Set:")
print(classification_report(y_val, y_val_pred))

# Print classification report for test set
print("\nClassification Report for Test Set:")
print(classification_report(y_test, y_test_pred))

### 2-D Representation for kPCA data

In [None]:
# 2D Representation of kPCA-transformed Data
plt.figure(figsize=(8, 6))
plt.scatter(x_train_kpca[:, 0], x_train_kpca[:, 1], c=y_train, cmap='viridis', alpha=0.7)
plt.title('2D Representation of kPCA-transformed Data')
plt.xlabel('Kernel Component 1')
plt.ylabel('Kernel Component 2')
plt.colorbar(label='Class')
plt.grid(True)
plt.show()

### Performing KNN classification on LLE transformed data

In [None]:
# 4. KNN classification with LLE

# Apply LLE for dimensionality reduction
start_time4 = timeit.default_timer()
lle = LocallyLinearEmbedding(n_components=227, n_neighbors=20, random_state=42)
x_train_lle = lle.fit_transform(x_train)
x_val_lle = lle.transform(x_val)
x_test_lle = lle.transform(x_test)

# Initialize KNN classifier
knn = KNeighborsClassifier(n_neighbors=20)

# Train the classifier
knn.fit(x_train_lle, y_train)

# Predict on validation set
y_val_pred = knn.predict(x_val_lle)

# Calculate accuracy and F1 score on validation set
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='macro')
print("Validation Accuracy: {:.2f}%".format(val_accuracy * 100))
print("Validation F1 Score: {:.2f}".format(val_f1_score))

# Predict on test set
y_test_pred = knn.predict(x_test_lle)

# Calculate accuracy and F1 score on test set
test_accuracy = accuracy_score(y_test, y_test_pred)
test_f1_score = f1_score(y_test, y_test_pred, average='macro')
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))
print("Test F1 Score: {:.2f}".format(test_f1_score))

lledata_time = timeit.default_timer() - start_time4
print("Execution time for LEE data: {:.2f} seconds".format(lledata_time))

In [None]:
# Class-wise accuracy
# For validation set
class_accuracy_val = []
for i in range(len(class_names)):
    class_indices_val = np.where(y_val == i)[0]
    class_accuracy_val.append(accuracy_score(y_val[class_indices_val], y_val_pred[class_indices_val]))

# Print class-wise accuracy on validation set
print("Class-wise Accuracy on Validation Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_val[i] * 100))

# For test set
class_accuracy_test = []
for i in range(len(class_names)):
    class_indices_test = np.where(y_test == i)[0]
    class_accuracy_test.append(accuracy_score(y_test[class_indices_test], y_test_pred[class_indices_test]))

# Print class-wise accuracy on test set
print("\nClass-wise Accuracy on Test Set:")
for i, class_name in enumerate(class_names):
    print("{}: {:.2f}%".format(class_name, class_accuracy_test[i] * 100))

# Print classification report for validation set
print("Classification Report for Validation Set:")
print(classification_report(y_val, y_val_pred, target_names=class_names))

# Print classification report for test set
print("Classification Report for Test Set:")
print(classification_report(y_test, y_test_pred, target_names=class_names))


### 2-D Representation for LLE data

In [None]:
# 2-D Representation for LLE-transformed data
plt.figure(figsize=(8, 6))
plt.scatter(x_train_lle[:, 0], x_train_lle[:, 1], c=y_train, cmap='viridis', alpha=0.7)
plt.title('2D Representation of LLE-transformed Data')
plt.xlabel('LLE Component 1')
plt.ylabel('LLE Component 2')
plt.colorbar(label='Class')
plt.grid(True)
plt.show()