In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [2]:
raw_datasets = {
    'Iris Dataset': datasets.load_iris(),
    'Handwritten Digits Dataset': datasets.load_digits(),
    'Wine Dataset': datasets.load_wine(),
}

In [None]:
for raw_label, raw_data in raw_datasets.items():
    target = np.unique(raw_data.target).tolist()
    target_names = raw_data.target_names.tolist()
    
    # 绘制样本的原始空间前2维散点图
    for i, c in enumerate(target):
        x = [raw_data.data[j, 0] for j in range(raw_data.data.shape[0]) if raw_data.target[j] == c]
        y = [raw_data.data[j, 1] for j in range(raw_data.data.shape[0]) if raw_data.target[j] == c]
        plt.scatter(x, y, label=target_names[i])
    plt.xlabel('First Dimension')
    plt.ylabel('Second Dismension')
    plt.title('Original space (%s)' % raw_label)
    plt.legend()
    plt.show()
    
    # PCA降维
    pca_reduced = PCA(n_components=2).fit_transform(raw_data.data)
    # 绘制样本的PCA降维子空间
    for i, c in enumerate(target):
        x = [pca_reduced[j, 0] for j in range(raw_data.data.shape[0]) if raw_data.target[j] == c]
        y = [pca_reduced[j, 1] for j in range(raw_data.data.shape[0]) if raw_data.target[j] == c]
        plt.scatter(x, y, label=target_names[i])
    plt.xlabel('First Component')
    plt.ylabel('Second Component')
    plt.title('PCA subspace (%s)' % raw_label)
    plt.legend()
    plt.show()
    
    # LDA降维
    lda_reduced = LinearDiscriminantAnalysis(n_components=2).fit_transform(raw_data.data, raw_data.target)
    # 绘制样本的LDA降维子空间散点图
    for i, c in enumerate(target):
        x = [lda_reduced[j, 0] for j in range(raw_data.data.shape[0]) if raw_data.target[j] == c]
        y = [lda_reduced[j, 1] for j in range(raw_data.data.shape[0]) if raw_data.target[j] == c]
        plt.scatter(x, y, label=target_names[i])

    plt.xlabel('First Dimension')
    plt.ylabel('Second Dimension')
    plt.title('LDA subspace (%s)' % raw_label)
    plt.legend()
    plt.show()
    
    # 应用分类器LDF, QDF, 1-NN
    clf_models = [LinearDiscriminantAnalysis(), QuadraticDiscriminantAnalysis(), 
                  KNeighborsClassifier(n_neighbors=1)]
    clf_names = ['LDF', 'QDF', '1-NN']
    
    X_train, X_test, y_train, y_test = train_test_split(raw_data.data, raw_data.target, test_size=0.4, 
                                                        random_state=0)
    
    for i, clf in enumerate(clf_models):
        # PCA reduced
        pca_model = PCA(n_components=2)
        pca_model.fit(X_train)
        pca_X_train = pca_model.transform(X_train)
        pca_X_test = pca_model.transform(X_test)

        clf.fit(pca_X_train, y_train)
        score = clf.score(pca_X_test, y_test)
        print('%s, PCA reduced, %s mean accuracy: %s. \n' % (raw_label, clf_names[i], score))

        # LDA reduced
        lda_model = LinearDiscriminantAnalysis(n_components=2)
        lda_model.fit(X_train, y_train)
        lda_X_train = lda_model.transform(X_train)
        lda_X_test = lda_model.transform(X_test)

        clf.fit(lda_X_train, y_train)
        score = clf.score(lda_X_test, y_test)
        print('%s, LDA reduced, %s mean accuracy: %s. \n' % (raw_label, clf_names[i], score))