In [15]:
import numpy as np
import plotly.graph_objects as go
from sklearn.datasets import make_circles
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import BernoulliNB
from sklearn.decomposition import TruncatedSVD
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier

In [4]:
# make a synthetic dataset
X, y = make_circles(factor=0.5, random_state=0, noise=0.05)

In [None]:
# use RandomTreesEmbedding to transform data
hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
X_transformed = hasher.fit_transform(X)

In [None]:
# Visualize result after dimensionality reduction using truncated SVD
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)

In [None]:
# Learn a Naive Bayes classifier on the transformed data
nb = BernoulliNB()
nb.fit(X_transformed, y)

In [None]:
# Learn an ExtraTreesClassifier for comparison
trees = ExtraTreesClassifier(max_depth=3, n_estimators=10, random_state=0)
trees.fit(X, y)

In [11]:
def plot_scatter(X, y):
    fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=X[:, 0],
            y=X[:, 1],
            mode="markers",
            marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
        )
    )

    fig.update_layout(
        title="Original Data",
        xaxis=dict(showticklabels=False),
        yaxis=dict(showticklabels=False)
    )

    return fig

In [12]:
fig = plot_scatter(X, y)
fig.show()

# ChatGPT

In [19]:
import numpy as np
import plotly.graph_objs as go

from sklearn.datasets import make_circles
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
from sklearn.decomposition import TruncatedSVD
from sklearn.naive_bayes import BernoulliNB

# make a synthetic dataset
X, y = make_circles(factor=0.5, random_state=0, noise=0.05)

# Make Pipeline
hasher = RandomTreesEmbedding(n_estimators=10, random_state=0, max_depth=3)
nb = BernoulliNB()
pipeline = make_pipeline(hasher, nb)
pipeline.fit(X, y)

# Visualize result after dimensionality reduction using truncated SVD
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)

# scatter plot of original and reduced data
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=X[:, 0],
        y=X[:, 1],
        mode="markers",
        marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
        name="Original Data (2d)"
    )
)

fig.add_trace(
    go.Scatter(
        x=X_reduced[:, 0],
        y=X_reduced[:, 1],
        mode="markers",
        marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
        name="Truncated SVD reduction (2d) of transformed data (%dd)" % X_transformed.shape[1]
    )
)

# Plot the decision in original space. For that, we will assign a color
# to each point in the mesh [x_min, x_max]x[y_min, y_max].
h = 0.01
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# transform grid using RandomTreesEmbedding
transformed_grid = hasher.transform(np.c_[xx.ravel(), yy.ravel()])
y_grid_pred = nb.predict_proba(transformed_grid)[:, 1]

fig.add_trace(
    go.Heatmap(
        x=np.arange(x_min, x_max, h),
        y=np.arange(y_min, y_max, h),
        z=y_grid_pred.reshape(xx.shape),
        colorscale="Viridis",
        opacity=0.8,
        showscale=False,
        name="Naive Bayes on Transformed data"
    )
)

fig.add_trace(
    go.Scatter(
        x=X[:, 0],
        y=X[:, 1],
        mode="markers",
        marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
        name="Naive Bayes on Transformed data"
    )
)
fig.show()

# transform grid using ExtraTreesClassifier
y_grid_pred = trees.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

# fig.add_trace(
#     go.Heatmap(
#         x=np.arange(x_min, x_max
