# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier
import tensorflow as tf
from tensorflow import keras

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
469/469 - 2s - loss: 0.3275 - accuracy: 0.9087 - 2s/epoch - 5ms/step
Epoch 2/5
469/469 - 2s - loss: 0.1321 - accuracy: 0.9616 - 2s/epoch - 4ms/step
Epoch 3/5
469/469 - 2s - loss: 0.0914 - accuracy: 0.9721 - 2s/epoch - 4ms/step
Epoch 4/5
469/469 - 2s - loss: 0.0688 - accuracy: 0.9793 - 2s/epoch - 4ms/step
Epoch 5/5
469/469 - 2s - loss: 0.0559 - accuracy: 0.9829 - 2s/epoch - 4ms/step
[Baseline NN] Test accuracy: 0.9741

=== Baseline NN ===
Accuracy: 0.9741
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.97      0.97      1032
           3       0.95      0.99      0.97      1010
           4       0.98      0.97      0.97       982
           5       0.98      0.96      0.97       892
           6       0.98      0.98    

[WinError 2] The system cannot find the file specified
  File "c:\Users\noams\anaconda3\envs\mnist_env\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
  File "c:\Users\noams\anaconda3\envs\mnist_env\lib\subprocess.py", line 505, in run
    with Popen(*popenargs, **kwargs) as process:
  File "c:\Users\noams\anaconda3\envs\mnist_env\lib\subprocess.py", line 951, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\noams\anaconda3\envs\mnist_env\lib\subprocess.py", line 1436, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


[KNN] k=3, Accuracy: 0.9705

=== KNN ===
Accuracy: 0.9705
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.96      1.00      0.98      1135
           2       0.98      0.97      0.97      1032
           3       0.96      0.97      0.96      1010
           4       0.98      0.97      0.97       982
           5       0.97      0.96      0.96       892
           6       0.98      0.99      0.98       958
           7       0.96      0.96      0.96      1028
           8       0.99      0.94      0.96       974
           9       0.96      0.96      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

Confusion Matrix:
[[ 974    1    1    0    0    1    2    1    0    0]
 [   0 1133    2    0    0    0    0    0    0    0]
 [  10    9  996    2    0    0    0

TypeError: __init__() got an unexpected keyword argument 'base_estimator'

# Functions Definitions

In [2]:
###############################################################################
# 1. DATA LOADING & PREPROCESSING
###############################################################################
def load_and_preprocess_data(use_pca=False, pca_variance=0.95):
    """
    Loads the MNIST dataset from Keras, flattens the images from (28,28) to (784,),
    normalizes pixel values to [0,1], and (optionally) reduces dimensionality using PCA.
    
    Arguments:
        use_pca (bool): Whether to apply PCA for dimensionality reduction.
        pca_variance (float): The variance ratio to keep when applying PCA. E.g., 0.95.
    
    Returns:
        X_train, X_test: Feature arrays (train/test).
        y_train, y_test: Label arrays (train/test).
    """
    # --- Load MNIST from Keras ---
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
    
    # --- Flatten images: from (28, 28) to (784,) ---
    X_train = X_train.reshape((X_train.shape[0], 28 * 28))
    X_test = X_test.reshape((X_test.shape[0], 28 * 28))
    
    # --- Convert from int to float32 for safety ---
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    
    # --- Normalize to [0,1] ---
    X_train /= 255.0
    X_test /= 255.0
    
    # --- Optional PCA ---
    if use_pca:
        pca = PCA(n_components=pca_variance)
        X_train = pca.fit_transform(X_train)
        X_test = pca.transform(X_test)
    
    return X_train, X_test, y_train, y_test

###############################################################################
# 2. BASELINE MODEL: SIMPLE NEURAL NETWORK
###############################################################################
def build_baseline_neural_network(input_dim, num_classes=10):
    """
    Builds a simple feed-forward neural network using Keras Sequential API.
    
    Arguments:
        input_dim (int): Dimensionality of input features (784 for raw MNIST, or fewer if PCA is used).
        num_classes (int): Number of output classes. Default is 10 for digits [0..9].
    
    Returns:
        A compiled tf.keras Sequential model.
    """
    model = keras.Sequential([
        keras.layers.InputLayer(input_shape=(input_dim,)),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(num_classes, activation='softmax')
    ])
    
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    return model

def train_baseline_nn(X_train, y_train, X_test, y_test, epochs=5, batch_size=128):
    """
    Trains the baseline neural network on the training data. Evaluates on the test set.
    
    Arguments:
        X_train, y_train: Training data and labels.
        X_test, y_test: Test data and labels.
        epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.
        
    Returns:
        model: The trained neural network.
        test_accuracy (float): Accuracy on the test set.
    """
    input_dim = X_train.shape[1]
    model = build_baseline_neural_network(input_dim=input_dim, num_classes=10)
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=2)
    
    # Evaluate
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"[Baseline NN] Test accuracy: {test_acc:.4f}")
    
    return model, test_acc

###############################################################################
# 3. CLASSICAL MACHINE LEARNING MODELS
###############################################################################

# 3.1 KNN
def train_knn(X_train, y_train, X_test, y_test, k=3):
    """
    Trains a K-Nearest Neighbors model and evaluates on the test set.
    
    Arguments:
        k (int): Number of neighbors.
    """
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"[KNN] k={k}, Accuracy: {acc:.4f}")
    return knn, acc

# 3.2 SVM
def train_svm(X_train, y_train, X_test, y_test, kernel='rbf'):
    """
    Trains a Support Vector Machine model with the specified kernel.
    
    Arguments:
        kernel (str): Kernel type ('linear', 'rbf', 'poly', etc.).
    """
    svm = SVC(kernel=kernel)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"[SVM] kernel={kernel}, Accuracy: {acc:.4f}")
    return svm, acc

# 3.3 Decision Tree
def train_decision_tree(X_train, y_train, X_test, y_test, max_depth=None):
    """
    Trains a Decision Tree classifier.
    
    Arguments:
        max_depth (int or None): The maximum depth of the tree. If None, no maximum.
    """
    dt = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"[DecisionTree] max_depth={max_depth}, Accuracy: {acc:.4f}")
    return dt, acc

# 3.4 AdaBoost
def train_adaboost(X_train, y_train, X_test, y_test, n_estimators=50):
    """
    Trains an AdaBoost classifier with decision trees as base estimators.
    
    Arguments:
        n_estimators (int): Number of weak learners.
    """
    ada = AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=1),
        n_estimators=n_estimators, random_state=42
    )
    ada.fit(X_train, y_train)
    y_pred = ada.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"[AdaBoost] n_estimators={n_estimators}, Accuracy: {acc:.4f}")
    return ada, acc

# 3.5 Random Forest
def train_random_forest(X_train, y_train, X_test, y_test, n_estimators=100):
    """
    Trains a Random Forest classifier.
    
    Arguments:
        n_estimators (int): Number of trees in the forest.
    """
    rf = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"[RandomForest] n_estimators={n_estimators}, Accuracy: {acc:.4f}")
    return rf, acc

###############################################################################
# 4. OPTIONAL ENSEMBLE
###############################################################################
def train_ensemble(models, X_test, y_test):
    """
    Simple demonstration of a majority-voting ensemble. 
    `models` is a list of trained (model_name, model) tuples.
    We'll do a simple majority vote on predictions.
    """
    # Collect predictions from each model
    predictions = []
    for name, model in models:
        preds = model.predict(X_test)
        predictions.append(preds.reshape(-1, 1))
    
    # Convert to NumPy array of shape (num_samples, num_models)
    predictions = np.concatenate(predictions, axis=1)
    
    # Majority vote
    final_preds = []
    for i in range(predictions.shape[0]):
        # For each sample, we see which label occurs most frequently
        votes, counts = np.unique(predictions[i, :], return_counts=True)
        final_pred = votes[np.argmax(counts)]
        final_preds.append(final_pred)
    final_preds = np.array(final_preds)
    
    # Evaluate
    ensemble_acc = accuracy_score(y_test, final_preds)
    print(f"[Ensemble] Majority Voting Accuracy: {ensemble_acc:.4f}")
    
    # Return final predictions if needed
    return final_preds, ensemble_acc

###############################################################################
# 5. EVALUATION FUNCTIONS
###############################################################################
def print_metrics_and_confusion_matrix(model_name, y_true, y_pred):
    """
    Prints accuracy, classification report, and confusion matrix.
    """
    acc = accuracy_score(y_true, y_pred)
    print(f"\n=== {model_name} ===")
    print(f"Accuracy: {acc:.4f}")
    print("Classification Report:")
    print(classification_report(y_true, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))


In [3]:
# --- Load data with optional PCA ---
# Set use_pca=True to reduce dimensionality to 95% variance
X_train, X_test, y_train, y_test = load_and_preprocess_data(use_pca=False, pca_variance=0.95)

In [4]:
# --- Baseline Neural Network ---
nn_model, nn_acc = train_baseline_nn(X_train, y_train, X_test, y_test, epochs=5, batch_size=128)
# We can get predictions for evaluation
y_pred_nn = np.argmax(nn_model.predict(X_test), axis=1)
print_metrics_and_confusion_matrix("Baseline NN", y_test, y_pred_nn)


Epoch 1/5
469/469 - 3s - loss: 0.3373 - accuracy: 0.9051 - 3s/epoch - 6ms/step
Epoch 2/5
469/469 - 2s - loss: 0.1349 - accuracy: 0.9603 - 2s/epoch - 5ms/step
Epoch 3/5
469/469 - 2s - loss: 0.0933 - accuracy: 0.9723 - 2s/epoch - 5ms/step
Epoch 4/5
469/469 - 2s - loss: 0.0716 - accuracy: 0.9783 - 2s/epoch - 5ms/step
Epoch 5/5
469/469 - 2s - loss: 0.0563 - accuracy: 0.9829 - 2s/epoch - 4ms/step
[Baseline NN] Test accuracy: 0.9743

=== Baseline NN ===
Accuracy: 0.9743
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.98      0.99      1135
           2       0.96      0.98      0.97      1032
           3       0.98      0.98      0.98      1010
           4       0.97      0.97      0.97       982
           5       0.99      0.96      0.97       892
           6       0.97      0.97      0.97       958
           7       0.97      0.98      0.97      1028
           8       0.

In [5]:

# --- KNN ---
knn_model, knn_acc = train_knn(X_train, y_train, X_test, y_test, k=3)
y_pred_knn = knn_model.predict(X_test)
print_metrics_and_confusion_matrix("KNN", y_test, y_pred_knn)


[KNN] k=3, Accuracy: 0.9705

=== KNN ===
Accuracy: 0.9705
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.96      1.00      0.98      1135
           2       0.98      0.97      0.97      1032
           3       0.96      0.97      0.96      1010
           4       0.98      0.97      0.97       982
           5       0.97      0.96      0.96       892
           6       0.98      0.99      0.98       958
           7       0.96      0.96      0.96      1028
           8       0.99      0.94      0.96       974
           9       0.96      0.96      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

Confusion Matrix:
[[ 974    1    1    0    0    1    2    1    0    0]
 [   0 1133    2    0    0    0    0    0    0    0]
 [  10    9  996    2    0    0    0

In [6]:

# --- SVM ---
svm_model, svm_acc = train_svm(X_train, y_train, X_test, y_test, kernel='rbf')
y_pred_svm = svm_model.predict(X_test)
print_metrics_and_confusion_matrix("SVM (RBF)", y_test, y_pred_svm)


[SVM] kernel=rbf, Accuracy: 0.9792

=== SVM (RBF) ===
Accuracy: 0.9792
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       980
           1       0.99      0.99      0.99      1135
           2       0.98      0.97      0.98      1032
           3       0.97      0.99      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.99      0.98      0.98       892
           6       0.99      0.99      0.99       958
           7       0.98      0.97      0.97      1028
           8       0.97      0.98      0.97       974
           9       0.97      0.96      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Confusion Matrix:
[[ 973    0    1    0    0    2    1    1    2    0]
 [   0 1126    3    1    0    1    1    1    2    0]
 [   6    1 1006    2  

In [7]:

# --- Decision Tree ---
dt_model, dt_acc = train_decision_tree(X_train, y_train, X_test, y_test, max_depth=None)
y_pred_dt = dt_model.predict(X_test)
print_metrics_and_confusion_matrix("Decision Tree", y_test, y_pred_dt)


[DecisionTree] max_depth=None, Accuracy: 0.8754

=== Decision Tree ===
Accuracy: 0.8754
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       980
           1       0.95      0.96      0.95      1135
           2       0.86      0.86      0.86      1032
           3       0.83      0.85      0.84      1010
           4       0.86      0.87      0.87       982
           5       0.85      0.83      0.84       892
           6       0.90      0.88      0.89       958
           7       0.91      0.90      0.91      1028
           8       0.82      0.81      0.81       974
           9       0.85      0.85      0.85      1009

    accuracy                           0.88     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.88      0.88      0.88     10000

Confusion Matrix:
[[ 914    1    7    4    6    9   16    5    8   10]
 [   0 1084    9    8    2    9    5    3   14    1]
 [  13

In [8]:

# --- AdaBoost ---
ada_model, ada_acc = train_adaboost(X_train, y_train, X_test, y_test, n_estimators=50)
y_pred_ada = ada_model.predict(X_test)
print_metrics_and_confusion_matrix("AdaBoost", y_test, y_pred_ada)


[AdaBoost] n_estimators=50, Accuracy: 0.5756

=== AdaBoost ===
Accuracy: 0.5756
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.60      0.72       980
           1       0.90      0.32      0.48      1135
           2       0.56      0.67      0.61      1032
           3       0.44      0.65      0.53      1010
           4       0.50      0.51      0.50       982
           5       0.46      0.50      0.48       892
           6       0.82      0.70      0.76       958
           7       0.69      0.60      0.64      1028
           8       0.52      0.75      0.62       974
           9       0.43      0.47      0.45      1009

    accuracy                           0.58     10000
   macro avg       0.62      0.58      0.58     10000
weighted avg       0.63      0.58      0.58     10000

Confusion Matrix:
[[584   2  22   7  12 285  15   7  45   1]
 [  0 368 248 446   1   6   3   8  53   2]
 [ 10  12 696  38  19  12  79  22 

In [9]:

# --- Random Forest ---
rf_model, rf_acc = train_random_forest(X_train, y_train, X_test, y_test, n_estimators=100)
y_pred_rf = rf_model.predict(X_test)
print_metrics_and_confusion_matrix("Random Forest", y_test, y_pred_rf)


[RandomForest] n_estimators=100, Accuracy: 0.9704

=== Random Forest ===
Accuracy: 0.9704
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.97      0.97      1032
           3       0.96      0.96      0.96      1010
           4       0.97      0.97      0.97       982
           5       0.98      0.96      0.97       892
           6       0.98      0.98      0.98       958
           7       0.97      0.96      0.97      1028
           8       0.96      0.95      0.96       974
           9       0.96      0.95      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000

Confusion Matrix:
[[ 971    0    0    0    0    2    3    1    3    0]
 [   0 1127    2    2    0    1    2    0    1    0]
 [  

In [None]:

# --- Ensemble (Majority Voting) ---
# Here we combine the trained models above
# You can also combine fewer or more, or use VotingClassifier from sklearn
models_list = [
    ("KNN", knn_model),
    ("SVM", svm_model),
    ("SVM", svm_model),
    # ("DT", dt_model),
    # ("Ada", ada_model),
    ("RF", rf_model)
    # If desired, you can also integrate the NN by converting its predictions 
    # to a scikit-learn style predictor, but that requires a small wrapper.
]
ensemble_preds, ensemble_acc = train_ensemble(models_list, X_test, y_test)
print_metrics_and_confusion_matrix("Ensemble Majority Voting", y_test, ensemble_preds)


[Ensemble] Majority Voting Accuracy: 0.9785

=== Ensemble Majority Voting ===
Accuracy: 0.9785
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.98      0.99      0.99      1135
           2       0.97      0.98      0.98      1032
           3       0.97      0.98      0.98      1010
           4       0.98      0.98      0.98       982
           5       0.99      0.97      0.98       892
           6       0.99      0.99      0.99       958
           7       0.97      0.97      0.97      1028
           8       0.98      0.97      0.97       974
           9       0.98      0.96      0.97      1009

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000

Confusion Matrix:
[[ 973    0    1    0    0    1    2    1    2    0]
 [   0 1129    2    1    0    1    1    0    1    0]

In [None]:

# --- Ensemble (Majority Voting) ---
# Here we combine the trained models above
# You can also combine fewer or more, or use VotingClassifier from sklearn
models_list = [
    ("KNN", knn_model),
    ("SVM", svm_model),
    ("SVM", svm_model),
    # ("DT", dt_model),
    # ("Ada", ada_model),
    ("RF", rf_model)
    # If desired, you can also integrate the NN by converting its predictions 
    # to a scikit-learn style predictor, but that requires a small wrapper.
]
ensemble_preds, ensemble_acc = train_ensemble(models_list, X_test, y_test)
print_metrics_and_confusion_matrix("Ensemble Majority Voting", y_test, ensemble_preds)
