In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import (KNeighborsClassifier,
                               NeighborhoodComponentsAnalysis)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('../feature_selection.csv')
df.head()

In [None]:
n = 50
random_state = 42

X = df.drop("koi_disposition", axis=1)
y = df["koi_disposition"]

In [None]:
# Split into train/test
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.3, stratify=y,
                     random_state=random_state)

In [None]:
best = 0
bestn = 1
bestm = ""
res = {}
for n_neighbors in range(1,n) :
    pca = make_pipeline(StandardScaler(),
                        PCA(random_state=random_state))
    lda = make_pipeline(StandardScaler(),
                        LinearDiscriminantAnalysis())
    nca = make_pipeline(StandardScaler(),
                        NeighborhoodComponentsAnalysis(random_state=random_state))
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    # Make a list of the methods to be compared
    dim_reduction_methods = [('PCA', pca), ('LDA', lda), ('NCA', nca)]

    for i, (name, model) in enumerate(dim_reduction_methods):
        model.fit(X_train, y_train)
        knn.fit(model.transform(X_train), y_train)
        acc_knn = knn.score(model.transform(X_test), y_test)
        if acc_knn > best:
            best = acc_knn
            bestn = n_neighbors
            bestm = name
        if name not in res.keys():
            res[name] = [acc_knn]
        else: res[name].append(acc_knn)
print("The best value was found with ", bestm, " and parameter k = ", bestn)

In [None]:
xaxis = range(1,n)
plt.plot(xaxis, res['PCA'], label = 'PCA')
plt.plot(xaxis, res['LDA'], label = 'LDA')
plt.plot(xaxis, res['NCA'], label = 'NCA')
plt.xlabel("K value")
plt.grid(True)
plt.legend()
plt.savefig("../figures/knn.pdf")