# Full data experiments

In [5]:
from prutils import *
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.decomposition import KernelPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# train_datasets = [RAW_PIXELS_DATASET, IM_FEATURES_DATASET]
# test_datasets = [RAW_PIXELS_TEST, IM_FEATURES_TEST]
# pca_n = [40, 15]
# ica_n = [30, 15]
# kpca_n = [27, 16]

train_datasets = [RAW_PIXELS_DATASET]
test_datasets = [RAW_PIXELS_TEST]
pca_n = [40]
ica_n = [30]
kpca_n = [27]

for i in range(len(train_datasets)):
    train_X, train_y = get_full_data(train_datasets[i], split_validation=False)

    """
    Results without processing
    """
    lda = LDA().fit(train_X, train_y)
    performance = estimate_classifier_performance_normal(lda, test_datasets[i])
    print "{} - feature reduction: {}%".format(test_datasets[i], performance)

    """
    Results with PCA
    """
    pca = PCA(n_components=pca_n[i])
    lda = LDA().fit(pca.fit_transform(train_X), train_y)
    performance = estimate_classifier_performance_transform(lda, test_datasets[i], pca)
    print "{} - PCA: {}%".format(test_datasets[i], performance)

    """
    Results with ICA
    """
    ica = FastICA(n_components=ica_n[i])
    lda = LDA().fit(ica.fit_transform(train_X), train_y)
    performance = estimate_classifier_performance_transform(lda, test_datasets[i], ica)
    print "{} - ICA: {}%".format(test_datasets[i], performance)
    
    """
    Results with KPCA
    """
    kpca = KernelPCA(n_components=kpca_n[i])
    lda = LDA().fit(kpca.fit_transform(train_X), train_y)
    performance = estimate_classifier_performance_transform(lda, test_datasets[i], kpca)
    print "{} - KPCA: {}%".format(test_datasets[i], performance)

data/preprocessed_test_nist_data.csv - feature reduction: 89.4%
data/preprocessed_test_nist_data.csv - PCA: 90.5%
data/preprocessed_test_nist_data.csv - ICA: 90.0%
data/preprocessed_test_nist_data.csv - KPCA: 90.0%


# Batched data experiments

In [6]:
from prutils import *
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
from sklearn.decomposition import KernelPCA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as LDA 
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

# train_datasets = [RAW_PIXELS_DATASET, IM_FEATURES_DATASET]
# test_datasets = [RAW_PIXELS_TEST, IM_FEATURES_TEST]
# pca_n = [30, 15]
# ica_n = [25, 15]
# kpca_n = [23, 16]

train_datasets = [RAW_PIXELS_DATASET]
test_datasets = [RAW_PIXELS_TEST]
pca_n = [30]
ica_n = [25]
kpca_n = [23]

runs = 50

for i in range(len(train_datasets)):
    normal_performance = 0
    pca_performance = 0
    ica_performance = 0
    kpca_performance = 0
    
    for _ in range(runs):
        train_X, train_y = get_random_batch(train_datasets[i], split_validation=False)

        """
        Results without processing
        """
        lda = LDA().fit(train_X, train_y)
        normal_performance += estimate_classifier_performance_normal(lda, test_datasets[i])

        """
        Results with PCA
        """
        pca = PCA(n_components=pca_n[i])
        lda = LDA().fit(pca.fit_transform(train_X), train_y)
        pca_performance += estimate_classifier_performance_transform(lda, test_datasets[i], pca)

        """
        Results with ICA
        """
        ica = FastICA(n_components=ica_n[i])
        lda = LDA().fit(ica.fit_transform(train_X), train_y)
        ica_performance += estimate_classifier_performance_transform(lda, test_datasets[i], ica)

        """
        Results with KPCA
        """
        kpca = KernelPCA(n_components=kpca_n[i])
        lda = LDA().fit(kpca.fit_transform(train_X), train_y)
        kpca_performance += estimate_classifier_performance_transform(lda, test_datasets[i], kpca)
        
    print "{} - feature reduction: {}%".format(test_datasets[i], normal_performance/float(runs))
    print "{} - PCA: {}%".format(test_datasets[i], pca_performance/float(runs))
    print "{} - ICA: {}%".format(test_datasets[i], ica_performance/float(runs))
    print "{} - KPCA: {}%".format(test_datasets[i], kpca_performance/float(runs))

data/preprocessed_test_nist_data.csv - feature reduction: 12.874%
data/preprocessed_test_nist_data.csv - PCA: 29.724%
data/preprocessed_test_nist_data.csv - ICA: 22.188%
data/preprocessed_test_nist_data.csv - KPCA: 29.65%


### PCA experiments

In [1]:
from prutils import *
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA 
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

pca_full_raw = run_PCA_experiment(LDA(), RAW_PIXELS_DATASET, max_components = 50, batch=False)
pca_batch_raw = run_PCA_experiment(LDA(), RAW_PIXELS_DATASET, max_components = 50, batch=True)

pca_full_features = run_PCA_experiment(LDA(), IM_FEATURES_DATASET, max_components = 50, batch=False)
pca_batch_features = run_PCA_experiment(LDA(), IM_FEATURES_DATASET, max_components = 50, batch=True)

plot_performance([pca_full_raw, pca_batch_raw, pca_full_features, pca_batch_features], show_results=False, save_to_file="lda_pca")

### ICA experiments

In [1]:
from prutils import *
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

ica_full_raw = run_ICA_experiment(LDA(), RAW_PIXELS_DATASET, max_components = 50, batch=False)
ica_batch_raw = run_ICA_experiment(LDA(), RAW_PIXELS_DATASET, max_components = 50, batch=True)

ica_full_features = run_ICA_experiment(LDA(), IM_FEATURES_DATASET, max_components = 50, batch=False)
ica_batch_features = run_ICA_experiment(LDA(), IM_FEATURES_DATASET, max_components = 50, batch=True)

plot_performance([ica_full_raw, ica_batch_raw, ica_full_features, ica_batch_features], show_results=False, save_to_file="lda_ica")

### Kernel PCA experiments

In [None]:
from prutils import *
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


kpca_full_raw = run_KPCA_experiment(LDA(), RAW_PIXELS_DATASET, max_components = 50)
kpca_batch_raw = run_KPCA_experiment(LDA(), RAW_PIXELS_DATASET, max_components = 50)

kpca_full_features = run_KPCA_experiment(LDA(), IM_FEATURES_DATASET, max_components = 50)
kpca_batch_features = run_KPCA_experiment(LDA(), IM_FEATURES_DATASET, max_components = 50)

plot_performance([kpca_full_raw, kpca_batch_raw, kpca_full_features, kpca_batch_features], 
                 show_results=False, save_to_file="lda_kpca")