In [5]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np


In [8]:
# Load the CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Flatten images to apply PCA (original size: 32x32x3)
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

# Standardize the data to zero mean and unit variance
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_flat)
x_test_scaled = scaler.transform(x_test_flat)


In [3]:
# Flatten images to apply PCA (original size: 32x32x3)
x_train_flat = x_train.reshape(x_train.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

In [6]:
# Standardize the data to zero mean and unit variance
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_flat)

In [9]:
# Apply PCA with 150 components
pca = PCA(n_components=150)
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)

# Check variance explained by each component
explained_variance_ratio = np.sum(pca.explained_variance_ratio_)
print(f"Total Explained Variance with 150 components: {explained_variance_ratio:.4f}")


Total Explained Variance with 150 components: 0.9271


In [10]:
def build_model(input_dim):
    model = Sequential([
        Dense(512, activation='relu', input_shape=(input_dim,)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')  # 10 classes for CIFAR-10
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [11]:
# Build and train the model on original dataset
model_original = build_model(x_train_flat.shape[1])
history_original = model_original.fit(x_train_flat, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate on test data
test_loss_orig, test_accuracy_orig = model_original.evaluate(x_test_flat, y_test)
print(f"Original Data Test Accuracy: {test_accuracy_orig:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Original Data Test Accuracy: 0.3315


In [12]:
# Build and train the model on PCA-reduced dataset
model_pca = build_model(x_train_pca.shape[1])
history_pca = model_pca.fit(x_train_pca, y_train, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate on test data
test_loss_pca, test_accuracy_pca = model_pca.evaluate(x_test_pca, y_test)
print(f"PCA Reduced Data Test Accuracy: {test_accuracy_pca:.4f}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
PCA Reduced Data Test Accuracy: 0.5110


In [13]:
# Calculate and display trade-off results
accuracy_difference = (test_accuracy_orig - test_accuracy_pca) * 100
print("\n--- Dimensionality Reduction Analysis ---")
print(f"Original Dataset Dimensionality: {x_train_flat.shape[1]}")
print(f"Reduced Dataset Dimensionality: {x_train_pca.shape[1]}")
print(f"Accuracy with Original Dataset: {test_accuracy_orig:.4f}")
print(f"Accuracy with PCA Reduced Dataset: {test_accuracy_pca:.4f}")
print(f"Accuracy Difference: {accuracy_difference:.2f}%")



--- Dimensionality Reduction Analysis ---
Original Dataset Dimensionality: 3072
Reduced Dataset Dimensionality: 150
Accuracy with Original Dataset: 0.3315
Accuracy with PCA Reduced Dataset: 0.5110
Accuracy Difference: -17.95%
