In [1]:
import altair as alt
import pandas as pd

source = pd.DataFrame({
    'category': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
    'value': [28, 55, 43, 91, 81, 53, 19, 87, 52]})
alt.Chart(source).mark_bar().encode(
    x='category',
    y='value'
)

In [2]:
import altair as alt
from vega_datasets import data

source = data.seattle_weather()
#print(source)
brush = alt.selection(type='interval', encodings=['x'])

bars = alt.Chart().mark_bar().encode(
    x='month(date):O',
    y='mean(precipitation):Q',
    opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)),
).add_selection(
    brush
)

line = alt.Chart().mark_rule(color='firebrick').encode(
    y='mean(precipitation):Q',
    size=alt.SizeValue(3)
).transform_filter(
    brush
)

alt.layer(bars, line, data=source)
# alt.layer(bars, line, data=source)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.


In [3]:
import altair as alt
from vega_datasets import data

source = data.cars()
#print(source)
brush = alt.selection(type='interval')

alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=alt.condition(brush, 'Cylinders:O', alt.value('grey')),
).add_selection(brush)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.


cross filter

In [4]:
import altair as alt
from vega_datasets import data

source = alt.UrlData(
    data.flights_2k.url,
    format={'parse': {'date': 'date'}}
)

print(source)

brush = alt.selection(type='interval', encodings=['x'])

# Define the base chart, with the common parts of the
# background and highlights
base = alt.Chart().mark_bar().encode(
    x=alt.X(alt.repeat('column'), type='quantitative', bin=alt.Bin(maxbins=20)),
    y='count()'
).properties(
    width=160,
    height=130
)

# gray background with selection
background = base.encode(
    color=alt.value('#ddd')
).add_selection(brush)

# blue highlights on the transformed data
highlight = base.transform_filter(brush)

# layer the two charts & repeat
alt.layer(
    background,
    highlight,
    data=source
).transform_calculate(
    "time",
    "hours(datum.date)"
).repeat(column=["distance", "delay", "time"])

UrlData({
  format: {'parse': {'date': 'date'}},
  url: 'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/flights-2k.json'
})


highlight

In [5]:
import altair as alt
import pandas as pd
import numpy as np

x = np.random.normal(size=100)
y = np.random.normal(size=100)

m = np.random.normal(15, 1, size=100)

source = pd.DataFrame({"x": x, "y":y, "m":m})

# interval selection in the scatter plot
pts = alt.selection(type="interval", encodings=["x"])

# left panel: scatter plot
points = alt.Chart().mark_point(filled=True, color="black").encode(
    x='x',
    y='y'
).transform_filter(
    pts
).properties(
    width=300,
    height=300
)

# right panel: histogram
mag = alt.Chart().mark_bar().encode(
    x='mbin:N',
    y="count()",
    color=alt.condition(pts, alt.value("black"), alt.value("lightgray"))
).properties(
    width=300,
    height=300
).add_selection(pts)

# build the chart:
alt.hconcat(
    points,
    mag,
    data=source
).transform_bin(
    "mbin",
    field="m",
    bin=alt.Bin(maxbins=20)
)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.


Case Study: Seattle Weather

In [6]:
import altair as alt
from vega_datasets import data


df = data.seattle_weather()

source = data.seattle_weather()

scale = alt.Scale(domain=['sun', 'fog', 'drizzle', 'rain', 'snow'],
                  range=['#e7ba52', '#a7a7a7', '#aec7e8', '#1f77b4', '#9467bd'])
color = alt.Color('weather:N', scale=scale)

brush = alt.selection(type='interval')

points = alt.Chart().mark_point().encode(
    alt.X('temp_max:Q', title='Maximum Daily Temperature (C)'),
    alt.Y('temp_range:Q', title='Daily Temperature Range (C)'),
    color=alt.condition(brush, 'weather:N', alt.value('lightgray'), scale=scale),
    size=alt.Size('precipitation:Q', scale=alt.Scale(range=[1, 200]))
).transform_calculate(
    "temp_range", "datum.temp_max - datum.temp_min"
).properties(
    width=600,
    height=400
).add_selection(
    brush
)

bars = alt.Chart().mark_bar().encode(
    x='count()',
    y='weather:N',
    color=alt.Color('weather:N', scale=scale),
).transform_calculate(
    "temp_range", "datum.temp_max - datum.temp_min"
).transform_filter(
    brush
).properties(
    width=600
)

alt.vconcat(points, bars, data=df)

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.


Case Study: IMDB vs. Altair

In [7]:
import altair as alt
from vega_datasets import data

source = data.movies.url

pts = alt.selection(type="single", encodings=['x'])

rect = alt.Chart(data.movies.url).mark_rect().encode(
    alt.X('IMDB_Rating:Q', bin=True),
    alt.Y('Rotten_Tomatoes_Rating:Q', bin=True),
    alt.Color('count()',
        scale=alt.Scale(scheme='greenblue'),
        legend=alt.Legend(title='Total Records')
    )
)

circ = rect.mark_point().encode(
    alt.ColorValue('grey'),
    alt.Size('count()',
        legend=alt.Legend(title='Records in Selection')
    )
).transform_filter(
    pts
)

bar = alt.Chart(source).mark_bar().encode(
    x='Major_Genre:N',
    y='count()',
    color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey"))
).properties(
    width=550,
    height=200
).add_selection(pts)

alt.vconcat(
    rect + circ,
    bar
).resolve_legend(
    color="independent",
    size="independent"
)

        combined and should be specified using "selection_point()".


In [8]:
import altair as alt
from vega_datasets import data

# Since these data are each more than 5,000 rows we'll import from the URLs
airports = data.airports.url
flights_airport = data.flights_airport.url

states = alt.topo_feature(data.us_10m.url, feature="states")

# Create mouseover selection
select_city = alt.selection_single(
    on="mouseover", nearest=True, fields=["origin"], empty="none"
)

# Define which attributes to lookup from airports.csv
lookup_data = alt.LookupData(
    airports, key="iata", fields=["state", "latitude", "longitude"]
)

background = alt.Chart(states).mark_geoshape(
    fill="lightgray",
    stroke="white"
).properties(
    width=750,
    height=500
).project("albersUsa")

connections = alt.Chart(flights_airport).mark_rule(opacity=0.35).encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    latitude2="lat2:Q",
    longitude2="lon2:Q"
).transform_lookup(
    lookup="origin",
    from_=lookup_data
).transform_lookup(
    lookup="destination",
    from_=lookup_data,
    as_=["state", "lat2", "lon2"]
).transform_filter(
    select_city
)

points = alt.Chart(flights_airport).mark_circle().encode(
    latitude="latitude:Q",
    longitude="longitude:Q",
    size=alt.Size("routes:Q", scale=alt.Scale(range=[0, 1000]), legend=None),
    order=alt.Order("routes:Q", sort="descending"),
    tooltip=["origin:N", "routes:Q"]
).transform_aggregate(
    routes="count()",
    groupby=["origin"]
).transform_lookup(
    lookup="origin",
    from_=lookup_data
).transform_filter(
    (alt.datum.state != "PR") & (alt.datum.state != "VI")
).add_selection(
    select_city
)

(background + connections + points).configure_view(stroke=None)



Customized and large data

In [9]:
#!/usr/bin/env python3
import altair as alt
import pandas as pd

#https://data.oecd.org/price/housing-prices.htm

data = pd.read_csv("https://raw.githubusercontent.com/qisun0/vis_interactivity/main/DP_LIVE.csv")
#print(data)
data["LOCATION"] = data["LOCATION"].astype("category")
data["SUBJECT"] = data["SUBJECT"].astype("category")
data["FREQUENCY"] = data["FREQUENCY"].astype("category")
data["TIME"] = pd.to_datetime(data["TIME"])

#print(data.dtypes)
#print(data.size)

data = data.loc[data["FREQUENCY"] == "Q",
        ["LOCATION", "SUBJECT", "TIME", "Value"]]


base = alt.Chart(data)\
        .encode(x="TIME:T", detail="LOCATION:N")

lines = alt.Chart(data).mark_line()\
        .encode(x="TIME:T", y="Value:Q", color="LOCATION:N")

#alt.layer(bars, lines)
#lines
lines.save("line.html")


In [10]:
#!/usr/bin/env python3
import altair as alt
import pandas as pd

data = pd.read_csv("https://raw.githubusercontent.com/qisun0/vis_interactivity/main/DP_LIVE.csv")

data["LOCATION"] = data["LOCATION"].astype("category")
data["SUBJECT"] = data["SUBJECT"].astype("category")
data["FREQUENCY"] = data["FREQUENCY"].astype("category")
data["TIME"] = pd.to_datetime(data["TIME"])

print(data.dtypes)

data = data.loc[data["FREQUENCY"] == "Q",
        ["LOCATION", "SUBJECT", "TIME", "Value"]]

base = alt.Chart(data)\
        .encode(x="TIME:T", detail="LOCATION")\
        .transform_filter(alt.datum.SUBJECT=="NOMINAL")

bars = base.mark_circle()\
        .encode(y="LOCATION:N", size="Value:Q",
                color=alt.Color("Value:Q", scale=alt.Scale(scheme="viridis")))
bars
#bars.save("bar.html")

LOCATION            category
INDICATOR             object
SUBJECT             category
MEASURE               object
FREQUENCY           category
TIME          datetime64[ns]
Value                float64
Flag Codes           float64
dtype: object


In [11]:
#!/usr/bin/env python3
import altair as alt
import pandas as pd

data = pd.read_csv("https://raw.githubusercontent.com/qisun0/vis_interactivity/main/DP_LIVE.csv")

data["LOCATION"] = data["LOCATION"].astype("category")
data["SUBJECT"] = data["SUBJECT"].astype("category")
data["FREQUENCY"] = data["FREQUENCY"].astype("category")
data["TIME"] = pd.to_datetime(data["TIME"])

# print(data.dtypes)

data = data.loc[data["FREQUENCY"] == "Q",["LOCATION", "SUBJECT", "TIME", "Value"]]

base = alt.Chart(data)\
        .encode(x="TIME:T", detail="LOCATION")\
        .transform_filter(alt.datum.SUBJECT=="NOMINAL")

lines = base.mark_line()\
        .encode(y="Value:Q", color="LOCATION:N")

bars = base.mark_circle()\
        .encode(y="LOCATION:N", size="Value:Q",
                color=alt.Color("Value:Q", scale=alt.Scale(scheme="viridis")))

chart = bars.properties(width=200, height=800) |\
        lines.properties(width=400, height=800)

chart
#chart.save("chart.html")

In [12]:
#!/usr/bin/env python3

import altair as alt
import pandas as pd

# # Load the data
data = pd.read_csv("https://raw.githubusercontent.com/qisun0/vis_interactivity/main/DP_LIVE.csv")

data["LOCATION"] = data["LOCATION"].astype("category")
data["SUBJECT"] = data["SUBJECT"].astype("category")
data["FREQUENCY"] = data["FREQUENCY"].astype("category")
data["TIME"] = pd.to_datetime(data["TIME"])

data = data.loc[data["FREQUENCY"] == "Q",
        ["LOCATION", "SUBJECT", "TIME", "Value"]]


# Plot!
location_selection = alt.selection(type="multi",
        on="click", fields=["LOCATION"], nearest=True)

year_selection = alt.selection(type="single",
        on="mouseover", fields=["TIME"], nearest=True, empty="none")

subject_dropdown = alt.binding_select(
        options=pd.unique(data["SUBJECT"]).tolist(),
        name="Subject: ")
subject_selection = alt.selection(type="single",
        bind=subject_dropdown, init={"SUBJECT": "NOMINAL"})

base = alt.Chart(data)\
        .encode(x="TIME:T", detail="LOCATION")\
        .transform_filter(subject_selection)

ghost = base.mark_circle()\
        .encode(y="Value:Q", opacity=alt.value(0))\
        .add_selection(location_selection)\
        .add_selection(year_selection)\
        .add_selection(subject_selection)

lines = base.mark_line()\
        .encode(y="Value:Q", color=alt.value("lightgray"), size=alt.value(1))

highlight_lines = base.mark_line()\
        .encode(y="Value:Q", color="LOCATION:N", size=alt.value(3))\
        .transform_filter(location_selection)

vline = base.mark_rule()\
        .encode(color=alt.value("gray"))\
        .transform_filter(year_selection)
text = base.mark_text(dx=5, dy=-5, align="left", size=16)\
        .encode(y="Value:Q", text=alt.Text("Value", format=",.0f"))\
        .transform_filter(year_selection)\
        .transform_filter(location_selection)

bar_scale = alt.Scale(scheme="viridis")
bar_size = alt.Size("Value:Q", legend=None)

bars = base.mark_circle()\
        .encode(y="LOCATION:N", size=bar_size,
                color=alt.value("lightgray"))\
        .add_selection(location_selection)

highlight_bars = base.mark_circle()\
        .encode(y="LOCATION:N",
                color=alt.Color("Value:Q", scale=bar_scale, legend=None),
                size=bar_size)\
        .transform_filter(location_selection)

chart = (bars.properties(width=400, height=800) + highlight_bars) |\
        (lines.properties(width=1000, height=800)
                + ghost + highlight_lines + vline + text)
#chart
chart.save("example.html")


   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


TypeError: altair.vegalite.v5.schema.core.SelectionParameter() got multiple values for keyword argument 'value'