In [1]:
import numpy as np
from bokeh.io import export_png, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10_10
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

output_notebook()

In [2]:
train_features = np.load("../data/electricity_nips/train_features.npy")
test_features = np.load("../data/electricity_nips/test_features.npy")
transformed_features = np.load("../data/electricity_nips_alternative/train_features.npy")

scaler = StandardScaler()
norm_train_features = scaler.fit_transform(train_features)
norm_test_features = scaler.transform(test_features)
norm_transformed_features = scaler.transform(transformed_features)

pca = PCA(n_components=2)
train_pca_data = pca.fit_transform(norm_train_features)
test_pca_data = pca.transform(norm_test_features)
transformed_pca_data = pca.transform(norm_transformed_features)

In [3]:
source_array = np.vstack([train_pca_data, test_pca_data, transformed_pca_data])
index_array = np.concatenate(
    [
        np.arange(len(train_pca_data)),
        np.arange(len(test_pca_data)),
        np.arange(len(transformed_pca_data)),
    ]
)

labels = (
    ["train data"] * len(train_pca_data)
    + ["test data"] * len(test_pca_data)
    + ["transformed data"] * len(transformed_pca_data)
)

source = ColumnDataSource(
    data={
        "comp1": source_array[:, 0],
        "comp2": source_array[:, 1],
        "ts_index": index_array,
        "label": labels,
    }
)

tooltips = [
    ("index", "@label at index @ts_index"),
    ("x val", "@comp1"),
    ("y val", "@comp2"),
]

fig = figure(
    x_axis_label="component 0",
    y_axis_label="component 1",
    tools=[],
    tooltips=tooltips,
    height=400,
    width=800,
    x_range=(-3, 7),
    y_range=(-3, 8),
)

fig.circle(
    "comp1",
    "comp2",
    source=source,
    selection_color=Category10_10[3],
    fill_alpha=0.5,
    line_alpha=0.5,
    color=factor_cmap(
        "label", Category10_10, ["train data", "test data", "transformed data"]
    ),
    legend_field="label",
)

fig.legend.location = "top_left"
fig.legend.background_fill_alpha = 0
fig.legend.border_line_alpha = 0
fig.background_fill_color = None
fig.border_fill_color = None
fig.toolbar.logo = None
fig.toolbar_location = None

export_png(fig, filename="../figures/transformed-data.png")
show(fig)

