In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load MNIST dataset
mnist = fetch_openml("mnist_784")

mnist_df = pd.DataFrame(mnist.data, columns=mnist.feature_names)

# Extract the data and target (labels)
data = mnist.data
target = mnist.target.astype(int)

# Define the dimensions of the grid
grid_width = 10
grid_height = 10

# Create a subplot with the specified grid dimensions
fig, axes = plt.subplots(grid_height, grid_width, figsize=(10, 10))

# Loop through and display the images
for i in range(grid_height):
    for j in range(grid_width):
        # Calculate the index for the current image in the grid
        index = i * grid_width + j

        # Get the image data and label for the current index
        image_data = data.iloc[index].to_numpy().reshape(28, 28)
        label = target[index]

        # Plot the image on the current subplot
        axes[i, j].imshow(image_data, cmap='gray')
        axes[i, j].axis('off')  # Turn off axis labels
        axes[i, j].set_title(f"Label: {label}")

# Adjust spacing and display the plot
plt.tight_layout()
plt.show()

# Apply PCA with 4 components
n_components = 4
pca = PCA(n_components=n_components)
data_pca = pca.fit_transform(data)

# Create a DataFrame for the PCA-transformed data
pca_df = pd.DataFrame(data_pca, columns=[f'PC{i+1}' for i in range(n_components)])

# Concatenate the PCA data with the target labels
pca_df['target'] = target

# pairplot for the PCA-transformed data
sns.pairplot(pca_df, hue='target')
plt.show()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_pca, target, test_size=0.2, random_state=42)

# Gaussian Naive Bayes classification
gnb = GaussianNB()
gnb.fit(X_train, y_train)

# Predict on the training and test data
train_predictions = gnb.predict(X_train)
test_predictions = gnb.predict(X_test)

# Calculate and report accuracy for the training and test datasets
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

print(f"Accuracy on Training Dataset (PCA): {train_accuracy:.2f}")
print(f"Accuracy on Test Dataset (PCA): {test_accuracy:.2f}")

# Apply PCA with 2 components
n_components_2 = 2
pca2 = PCA(n_components=n_components_2)
data_pca_2 = pca2.fit_transform(data)

# Create a DataFrame for the PCA-transformed data
pca_2_df = pd.DataFrame(data_pca_2, columns=[f'PC{i+1}' for i in range(n_components_2)])

# Concatenate the PCA data with the target labels
pca_2_df['target'] = target

# 4.1. Create a pairplot for the PCA-transformed data
sns.pairplot(pca_2_df, hue='target')
plt.show()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_pca_2, target, test_size=0.2, random_state=42)

# 4.2. Perform Gaussian Naive Bayes classification
gnb.fit(X_train, y_train)

# Predict on the training and test data
train_predictions = gnb.predict(X_train)
test_predictions = gnb.predict(X_test)

# Calculate and report accuracy for the training and test datasets
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

print(f"Accuracy on Training Dataset (PCA 2 Components): {train_accuracy:.2f}")
print(f"Accuracy on Test Dataset (PCA 2 Components): {test_accuracy:.2f}")

# Apply Isomap to the PCA-transformed data
isomap = Isomap(n_components=2)
data_isomap = isomap.fit_transform(data_pca_2)

# Create a DataFrame for the Isomap-transformed data
isomap_df = pd.DataFrame(data_isomap, columns=['Component 1', 'Component 2'])

# pairplot for the Isomap-transformed data
sns.pairplot(isomap_df, hue=target)
plt.title('Pair Plot for Isomap (PCA 2 Components)')
plt.show()

# Split the data into training and testing sets for Isomap
X_train, X_test, y_train, y_test = train_test_split(data_isomap, target, test_size=0.2, random_state=42)

# Gaussian Naive Bayes classification on Isomap-transformed data
gnb.fit(X_train, y_train)

# Predict on the training and test data for Isomap
train_predictions = gnb.predict(X_train)
test_predictions = gnb.predict(X_test)

# Calculate and report accuracy for Isomap-transformed data
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)

print(f"Accuracy on Training Dataset (Isomap): {train_accuracy:.2f}")
print(f"Accuracy on Test Dataset (Isomap): {test_accuracy:.2f}")


  warn(
