**Evaluation of different Dimensionality Reduction Methods

In [None]:
# example of pca for dimensionality reduction
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.manifold import Isomap
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import time

In [None]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=500, n_informative=20, n_redundant=480, random_state=7)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
# define the transform
trans = PCA(n_components=20)
# transform the data
X_PCA = trans.fit_transform(X)
# summarize data after the transform
#print(X_PCA[:3, :])
X_trainPCA, X_testPCA, y_train, y_test = train_test_split(X_PCA, y, test_size=0.2, shuffle=False)

In [None]:
# define the transform
trans = TruncatedSVD(n_components=20)
# transform the data
X_SVD = trans.fit_transform(X)
# summarize data after the transform
#print(X_SVD[:3, :])
X_trainSVD, X_testSVD, y_train, y_test = train_test_split(X_SVD, y, test_size=0.2, shuffle=False)

In [None]:
# define the transform
trans = Isomap(n_components=20)
# transform the data
X_Iso = trans.fit_transform(X)
# summarize data after the transform
#print(X_Iso[:3, :])
X_trainIso, X_testIso, y_train, y_test = train_test_split(X_Iso, y, test_size=0.2, shuffle=False)

In [None]:
start_time = time.time()
model = LogisticRegression(solver='liblinear', multi_class='auto')
NoRedu_Model = model.fit(X_train, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
start_time = time.time()
model = LogisticRegression(solver='liblinear', multi_class='auto')
PCA_Model = model.fit(X_trainPCA, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
start_time = time.time()
model = LogisticRegression(solver='liblinear', multi_class='auto')
SVD_Model = model.fit(X_trainSVD, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
start_time = time.time()
model = LogisticRegression(solver='liblinear', multi_class='auto')
Iso_Model = model.fit(X_trainIso, y_train)
print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
yhat = NoRedu_Model.predict(X_test)
accuracy = accuracy_score(y_test, yhat)
NoRedu_Acc = accuracy * 100.0
print('>%s: %.3f' % ("NoRedu", NoRedu_Acc))


yhat = PCA_Model.predict(X_testPCA)
accuracy = accuracy_score(y_test, yhat)
PCA_Acc = accuracy * 100.0
print('>%s: %.3f' % ("PCA", PCA_Acc))


yhat = SVD_Model.predict(X_testSVD)
accuracy = accuracy_score(y_test, yhat)
SVD_Acc = accuracy * 100.0
print('>%s: %.3f' % ("SVD", SVD_Acc))


yhat = Iso_Model.predict(X_testIso)
accuracy = accuracy_score(y_test, yhat)
Iso_Acc = accuracy * 100.0
print('>%s: %.3f' % ("Isomap", Iso_Acc))