# Questions from Lab 03

## Imports

In [None]:
# If you're running this on colab, then you can uncomment the below command to
# install the pmlb library.
# !pip install pmlb

In [None]:
import altair as alt
import numpy as np
import pandas as pd
import pmlb
from itertools import product

from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA

In [None]:
# If you're running this code locally, then you can uncomment this to automatically
# save the chart data in files, rather than including the data in the spec. 

# !mkdir -p data
# alt.data_transformers.enable('json', prefix='data/altair-data')

## Setup

In [None]:
mnist = pmlb.fetch_data('mnist')
mnist_small = mnist.sample(n=5000)

X = mnist_small.drop(columns=['target'])
y = mnist_small['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

tsne = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3)
X_train_embedded = tsne.fit_transform(X_train)

pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)

df_train = pd.DataFrame(X_train_embedded, columns=['x-tsne', 'y-tsne'])
df_train['target'] = y_train

df_train['x-pca'] = X_train_pca[:,0]
df_train['y-pca'] = X_train_pca[:,1]

## Is there a better way to have brushing plus panning and zooming?

Interval selections don't work nicely with `.interactive()`:

In [None]:
brush = alt.selection_interval()

base = alt.Chart(df_train).mark_circle().encode(
    color=alt.condition(brush, 'target:N', alt.value('#dddddd')),
).add_selection(
    brush
).properties(
    width=350,
    height=350
).interactive()

base.encode(x='x-tsne', y='y-tsne') | base.encode(x='x-pca', y='y-pca')

In the version below, you can hold down the shift key to use the brush. This example is based on this [StackOverflow answer](https://stackoverflow.com/a/65006769/5016634) from the author of Altair. For additional resources, see the [Altair docs on interaction](https://altair-viz.github.io/user_guide/interactions.html#) and the [Vega docs on event stream selectors](https://vega.github.io/vega/docs/event-streams/#selector).

In [None]:
brush = alt.selection_interval(
    on='[mousedown[event.shiftKey], mouseup] > mousemove',
    translate='[mousedown[event.shiftKey], mouseup] > mousemove!'
)

pan_zoom = alt.selection_interval(
    on='[mousedown[!event.shiftKey], mouseup] > mousemove',
    translate='[mousedown[!event.shiftKey], mouseup] > mousemove!',
    bind='scales'
)

base = alt.Chart(df_train).mark_circle().encode(
    color=alt.condition(brush, 'target:N', alt.value('#dddddd')),
).properties(
    width=350,
    height=350
).add_selection(
    pan_zoom, brush
)

alt.hconcat(base.encode(x='x-tsne', y='y-tsne'), base.encode(x='x-pca', y='y-pca'))

## Can you have multiple brushes?

The [docs](https://altair-viz.github.io/user_guide/interactions.html#composing-multiple-selections) give an example of two having brushes. One is created by holding down the shift key and the other is created by holding down the alt (or option) key. I'm not sure of a way to have an arbitrary number of interval selections.

In [None]:
brush1 = alt.selection_interval(
    on='[mousedown[event.shiftKey], mouseup] > mousemove'
)

brush2 = alt.selection_interval(
    on='[mousedown[event.altKey], mouseup] > mousemove'
)

base = alt.Chart(df_train).mark_circle().encode(
    color=alt.condition(brush1 | brush2, 'target:N', alt.value('#dddddd')),
).properties(
    width=350,
    height=350
).add_selection(
    brush1, brush2
)

alt.hconcat(base.encode(x='x-tsne', y='y-tsne'), base.encode(x='x-pca', y='y-pca'))

Alternatively, you can select multiple items individually. For example, below you can click on a circle to select it. To select multiple circles, you can shift-click.

In [None]:
select = alt.selection_multi()

base = alt.Chart(df_train).mark_circle().encode(
    color=alt.condition(select, 'target:N', alt.value('#dddddd')),
).properties(
    width=350,
    height=350
).add_selection(
    select
)

alt.hconcat(base.encode(x='x-tsne', y='y-tsne'), base.encode(x='x-pca', y='y-pca'))

In this example, you can select multiple points by holding down shift and hovering over them.

In [None]:
select = alt.selection_multi(on='mouseover')

base = alt.Chart(df_train).mark_circle().encode(
    color=alt.condition(select, 'target:N', alt.value('#dddddd')),
).properties(
    width=350,
    height=350
).add_selection(
    select
)

alt.hconcat(base.encode(x='x-tsne', y='y-tsne'), base.encode(x='x-pca', y='y-pca'))

## Other interaction

So far we've used selections to highlight the same data point in other views. With the `fields` attribute, we can specify which columns we want to consider when determining what data points are selected. In the below example, when we hover over a data point, we select all data points that have the same target. Note that this does not work with brushes, only `selection_single` and `selection_multi`.

For more examples, see the [docs](https://altair-viz.github.io/user_guide/interactions.html#selection-targets-fields-and-encodings).

In [None]:
select = alt.selection_single(on='mouseover', fields=['target'])

base = alt.Chart(df_train).mark_circle().encode(
    color=alt.condition(select, 'target:N', alt.value('#dddddd')),
).properties(
    width=350,
    height=350
).add_selection(
    select
)

alt.hconcat(base.encode(x='x-tsne', y='y-tsne'), base.encode(x='x-pca', y='y-pca'))