# Code 1: MNIST Classification

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
import xgboost as xgb
import pandas as pd
import numpy as np

In [None]:
# Step 1: Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32')  # Features (pixel values)
y = mnist.target.astype('int')      # Labels (digits)

In [None]:
# Step 2: Preprocess the data
# Normalize the pixel values to [0, 1] 
X /= 255.0

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [None]:
# Step 3: Train an XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [None]:
# Step 4: Make predictions and evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Test accuracy: {accuracy:.4f}')

Test accuracy: 0.9781


# Code 2: MNIST classification with PCA

In [None]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score

In [None]:
# Step 1: Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data.astype('float32')  # Features (pixel values)
y = mnist.target.astype('int')      # Labels (digits)


In [None]:
# Step 2: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [None]:
# (a) Feature Construction: Project images onto a set of principal components (50 components)
pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train)  # Fit PCA and transform training data
X_test_pca = pca.transform(X_test)        # Transform testing data using the same PCA model

In [None]:
# Train XGBoost model with PCA features
model_pca = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model_pca.fit(X_train_pca, y_train)

In [None]:
# Make predictions and evaluate accuracy
y_pred_pca = model_pca.predict(X_test_pca)
accuracy_pca = accuracy_score(y_test, y_pred_pca)
print(f'Test accuracy with PCA (50 components): {accuracy_pca:.4f}')

# (b) Feature Construction: Project images onto per-class principal components (500 components)
X_train_per_class = []
X_test_per_class = []

Test accuracy with PCA (50 components): 0.9631


In [None]:
# Calculate per-class PCA components
for digit in range(10):
    X_class_train = X_train[y_train == digit]  # Select training data for the current digit
    X_class_test = X_test[y_test == digit]      # Select testing data for the current digit

    pca_class = PCA(n_components=50)
    X_class_train_pca = pca_class.fit_transform(X_class_train)  # PCA for current class
    X_class_test_pca = pca_class.transform(X_class_test)        # Transform testing data for current class

    # Append the PCA features for the current class
    X_train_per_class.append(X_class_train_pca)
    X_test_per_class.append(X_class_test_pca)

In [None]:
# Concatenate PCA features for all classes
X_train_per_class = np.vstack(X_train_per_class)
X_test_per_class = np.vstack(X_test_per_class)

In [None]:
# Create labels for the new training and testing sets
y_train_per_class = np.repeat(
    np.arange(10), [X_train[y_train == digit].shape[0] for digit in range(10)])
y_test_per_class = np.repeat(
    np.arange(10), [X_test[y_test == digit].shape[0] for digit in range(10)])

In [None]:
# Train XGBoost model with per-class PCA features
model_per_class = xgb.XGBClassifier(
    use_label_encoder=False, eval_metric='mlogloss')
model_per_class.fit(X_train_per_class, y_train_per_class)

In [None]:
# Make predictions and evaluate accuracy
y_pred_per_class = model_per_class.predict(X_test_per_class)
accuracy_per_class = accuracy_score(y_test_per_class, y_pred_per_class)
print(
    f'Test accuracy with per-class PCA (500 components): {accuracy_per_class:.4f}')

Test accuracy with per-class PCA (500 components): 0.7840


# Code 3: CIFAR-10 Classification

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

In [None]:
# Step 1: Load the CIFAR-10 dataset
def load_cifar10():
    # CIFAR-10 is available from the TensorFlow Datasets
    from tensorflow.keras.datasets import cifar10
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    # Flatten images to 1D arrays
    X_train = X_train.reshape(-1, 3072).astype('float32') / \
        255.0  # Normalize pixel values
    X_test = X_test.reshape(-1, 3072).astype('float32') / 255.0
    return X_train, y_train.flatten(), X_test, y_test.flatten()

In [None]:
# Load the dataset
X_train, y_train, X_test, y_test = load_cifar10()

In [None]:
# Step 2: Split the dataset into training and testing sets (optional since we already have train/test)
# Here we are directly using the loaded data

In [None]:
# Step 3: Train an XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train)

In [None]:
# Step 4: Make predictions and evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f'Test accuracy on CIFAR-10: {accuracy:.4f}')

# Code 4: CIFAR-10 classification with PCA

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import cifar10

In [None]:
# Step 1: Load the CIFAR-10 dataset
def load_cifar10():
    (X_train, y_train), (X_test, y_test) = cifar10.load_data()
    X_train = X_train.reshape(-1, 3072).astype('float32') / \
        255.0  # Normalize pixel values
    X_test = X_test.reshape(-1, 3072).astype('float32') / 255.0
    return X_train, y_train.flatten(), X_test, y_test.flatten()

In [None]:
# Load the dataset
X_train, y_train, X_test, y_test = load_cifar10()

In [None]:
# (a) Feature Construction: Project images onto a set of principal components (50 components)
pca = PCA(n_components=50)
X_train_pca = pca.fit_transform(X_train)  # Fit PCA and transform training data
X_test_pca = pca.transform(X_test)        # Transform testing data using the same PCA model

In [None]:
# Train XGBoost model with PCA features
model_pca = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model_pca.fit(X_train_pca, y_train)

In [None]:
# Make predictions and evaluate accuracy
y_pred_pca = model_pca.predict(X_test_pca)
accuracy_pca = accuracy_score(y_test, y_pred_pca)
print(f'Test accuracy with PCA (50 components): {accuracy_pca:.4f}')

In [None]:
# (b) Feature Construction: Project images onto per-class principal components (500 components)
X_train_per_class = []
X_test_per_class = []

In [None]:
# Calculate per-class PCA components
for digit in range(10):
    X_class_train = X_train[y_train == digit]  # Select training data for the current digit
    X_class_test = X_test[y_test == digit]      # Select testing data for the current digit

    pca_class = PCA(n_components=50)
    X_class_train_pca = pca_class.fit_transform(X_class_train)  # PCA for current class
    X_class_test_pca = pca_class.transform(X_class_test)        # Transform testing data for current class

    # Append the PCA features for the current class
    X_train_per_class.append(X_class_train_pca)
    X_test_per_class.append(X_class_test_pca)


In [None]:
# Concatenate PCA features for all classes
X_train_per_class = np.vstack(X_train_per_class)
X_test_per_class = np.vstack(X_test_per_class)

In [None]:
# Create labels for the new training and testing sets
y_train_per_class = np.repeat(
    np.arange(10), [X_train[y_train == digit].shape[0] for digit in range(10)])
y_test_per_class = np.repeat(
    np.arange(10), [X_test[y_test == digit].shape[0] for digit in range(10)])

In [None]:
# Train XGBoost model with per-class PCA features
model_per_class = xgb.XGBClassifier(
    use_label_encoder=False, eval_metric='mlogloss')
model_per_class.fit(X_train_per_class, y_train_per_class)

In [None]:
# Make predictions and evaluate accuracy
y_pred_per_class = model_per_class.predict(X_test_per_class)
accuracy_per_class = accuracy_score(y_test_per_class, y_pred_per_class)
print(
    f'Test accuracy with per-class PCA (500 components): {accuracy_per_class:.4f}')