**Chapter 8 – Dimensionality Reduction** Local linear Embedding

In this notebook, we build a small swiss roll dataset with 3 dimensions and apply LLE to reduce the dimensions

# Manifold learning

# LLE: Local Linear Embedding

In [None]:
from sklearn.datasets import make_swiss_roll, make_circles, make_moons

X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=41)
t=t>10 #convert the continuous variable to categorical

In [None]:
#Features
X.shape

(1000, 3)

In [None]:
#labels
t.shape

(1000,)

# You can now build a classification of your choice, SGD or Logit or SVC and measure your model performance

In [None]:
# Your code here to train a model

# Evaluate your model

In [None]:
# Your code to evaluate model goes here

In [None]:
#@title <font color="green">Xuan's solution</font> { display-mode: "form" }
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
cross_val_score(make_pipeline(MinMaxScaler(), LogisticRegression()), X, t, cv=3, scoring="f1").mean()

# Now lets try LLE and see what is the impact of reducing dimensions on model accuracy

In [None]:
from sklearn.manifold import LocallyLinearEmbedding

lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10, random_state=42)
X_reduced = lle.fit_transform(X)

In [None]:
print("Dimensions of the original data is:",X.shape)
print("Dimensions of the reduced data is:",X_reduced.shape )


Dimensions of the original data is: (1000, 3)
Dimensions of the reduced data is: (1000, 2)


# Now try to repeat the same model that your build and evaluate the performance of the model. What is the impact?

Perhaps this is a toy dataset and depends on the noise level you will have different results.

In [None]:
#@title <font color="green">Xuan's solution</font> { display-mode: "form" }
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.manifold import LocallyLinearEmbedding
cross_val_score(make_pipeline(MinMaxScaler(), LocallyLinearEmbedding(n_components=2, n_neighbors=10, random_state=42), LogisticRegression()), X, t, cv=3, scoring="f1").mean()

#Below are few more techniques for dimensionality reduction under manifold learning.

# MDS, Isomap and t-SNE

In [None]:
from sklearn.manifold import MDS

mds = MDS(n_components=2, random_state=42)
X_reduced_mds = mds.fit_transform(X)

In [None]:
from sklearn.manifold import Isomap

isomap = Isomap(n_components=2)
X_reduced_isomap = isomap.fit_transform(X)

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=42)
X_reduced_tsne = tsne.fit_transform(X)

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis(n_components=2)
X_mnist = mnist["data"]
y_mnist = mnist["target"]
lda.fit(X_mnist, y_mnist)
X_reduced_lda = lda.transform(X_mnist)

In [None]:
titles = ["MDS", "Isomap", "t-SNE"]

plt.figure(figsize=(11,4))

for subplot, title, X_reduced in zip((131, 132, 133), titles,
                                     (X_reduced_mds, X_reduced_isomap, X_reduced_tsne)):
    plt.subplot(subplot)
    plt.title(title, fontsize=14)
    plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=t, cmap=plt.cm.hot)
    plt.xlabel("$z_1$", fontsize=18)
    if subplot == 131:
        plt.ylabel("$z_2$", fontsize=18, rotation=0)
    plt.grid(True)

save_fig("other_dim_reduction_plot")
plt.show()

In [None]:
def learned_parameters(model):
    return [m for m in dir(model)
            if m.endswith("_") and not m.startswith("_")]