In [1]:
from sklearn.datasets import fetch_openml, load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from utils import pca_np
import numpy as np

import time


In [None]:
PCA_SKLEARN = 'pca_sklearn'
PCA_OWN = 'pca_own'
MAX_ITER = 50000
TEST_SIZE = 0.3

def log(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        print(f'Starting with {kwargs}')
        result = func(*args, **kwargs)
        elapsed = time.time() - start
        print(f'Finished. Time elapsed {elapsed:0.2f}s\n')
        return result
    return wrapper

@log
def score(pca_type=None, n_components=None):
    images, labels = data.data, data.target

    scaler = StandardScaler()
    scaler.fit(images)
    images = scaler.transform(images)

    if pca_type == PCA_SKLEARN:
        pca = PCA(n_components=n_components)
        pca.fit(images)
        images = pca.transform(images)

    if pca_type == PCA_OWN:
        data_evals, data_evecs, data_mean = pca_np(images)
        data_evecs_n = data_evecs[:, :n_components]
        images = np.dot(images-data_mean, data_evecs_n)

    images_train, images_test, labels_train, labels_test = train_test_split(
        images, labels, test_size=TEST_SIZE, random_state=0)

    logistic_regression = LogisticRegression(max_iter=MAX_ITER, solver='lbfgs')
    logistic_regression.fit(images_train, labels_train)
    logistic_regression.predict(images_test[0].reshape(1, -1))

    score = logistic_regression.score(images_test, labels_test)

    print(f'Score: {score}')

data = fetch_openml('mnist_784')
score(n_components=10, pca_type=PCA_OWN)
score(n_components=50, pca_type=PCA_OWN)
score(n_components=100, pca_type=PCA_OWN)
score(n_components=200, pca_type=PCA_OWN)
score(n_components=200, pca_type=PCA_SKLEARN)
score(pca_type=None)