In [81]:
import altair as alt
import pandas as pd
from vega_datasets import data as vega_data

In [82]:
data = vega_data

In [87]:
chart = alt.Chart(data.cars.url).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N'
)
chart

In [43]:
chart.save('chart.html')

In [44]:
# TODO here figure out how to get our dataset integrated

In [45]:
# well, we can just pass in a dataframe
import altair as alt
import pandas as pd

data = pd.DataFrame({'x': ['A', 'B', 'C', 'D', 'E'],
                     'y': [5, 3, 6, 7, 2]})
chart2 = alt.Chart(data).mark_bar().encode(
    x='x',
    y='y',
)

In [46]:
# could now just do this with the data that we have...
games = pd.read_csv('../vgsales_clean.csv')
games.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [47]:
games.Publisher.unique()

array(['Nintendo', 'Microsoft Game Studios', 'Take-Two Interactive',
       'Sony Computer Entertainment', 'Activision', 'Ubisoft',
       'Bethesda Softworks', 'Electronic Arts', 'Sega', 'SquareSoft',
       'Atari', '505 Games', 'Capcom', 'GT Interactive',
       'Konami Digital Entertainment',
       'Sony Computer Entertainment Europe', 'Square Enix', 'LucasArts',
       'Virgin Interactive', 'Warner Bros. Interactive Entertainment',
       'Universal Interactive', 'Eidos Interactive', 'RedOctane',
       'Vivendi Games', 'Enix Corporation', 'Namco Bandai Games',
       'Palcom', 'Hasbro Interactive', 'THQ', 'Fox Interactive',
       'Acclaim Entertainment', 'MTV Games', 'Disney Interactive Studios',
       'Majesco Entertainment', 'Codemasters', 'Red Orb', 'Level 5',
       'Arena Entertainment', 'Midway Games', 'JVC', 'Deep Silver',
       '989 Studios', 'NCSoft', 'UEP Systems', 'Parker Bros.', 'Maxis',
       'Imagic', 'Tecmo Koei', 'Valve Software', 'ASCII Entertainment',
     

In [48]:
# nice!
# let's host this json on github and update...
data_url = 'https://raw.githubusercontent.com/6859-sp21/a4-video-game-sales/main/vgsales_clean.json'
chart_test = alt.Chart(data_url).mark_bar().encode(
    x='Year:O',
    y='sum(Global_Sales):Q'
)
chart_test

In [49]:
# let's do a stacked area chart of sales per year, colored by genre
chart_stacked = alt.Chart(data_url).mark_area().encode(
    x='Year:O',
    y='sum(Global_Sales):Q',
    color='Genre:N'
)
chart_stacked

In [50]:
cars = vega_data.cars.url

In [79]:
# selections!
brush = alt.selection_interval()  # selection of type "interval"
chart_select = alt.Chart(cars).mark_point().encode(
    x='Miles_per_Gallon:Q',
    y='Horsepower:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).add_selection(
    brush
)
chart_select

In [52]:
chart_select.save('interactive_test.html')

In [53]:
# multiple charts!
chart = alt.Chart(cars).mark_point().encode(
    y='Horsepower:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).properties(
    width=250,
    height=250
).add_selection(
    brush
)

chart.encode(x='Acceleration:Q') | chart.encode(x='Miles_per_Gallon:Q')

In [54]:
# only care about x direction

chart = alt.Chart(cars).mark_point().encode(
    y='Horsepower:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).properties(
    width=250,
    height=250
).add_selection(
    brush
)

chart.encode(x='Acceleration:Q') | chart.encode(x='Miles_per_Gallon:Q')


In [55]:
def make_example(selector):
    cars = vega_data.cars.url

    return alt.Chart(cars).mark_rect().encode(
        x="Cylinders:O",
        y="Origin:N",
        color=alt.condition(selector, 'count()', alt.value('lightgray'))
    ).properties(
        width=300,
        height=180
    ).add_selection(
        selector
    )

In [56]:
interval = alt.selection_interval()
make_example(interval)

In [57]:
interval_x = alt.selection_interval(encodings=['x'], empty='none')
make_example(interval_x)

In [58]:
# bind to scales
scales = alt.selection_interval(bind='scales')

alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N'
).add_selection(
    scales
)


In [59]:
single = alt.selection_single()
make_example(single)

In [60]:
# mouseover!
single_nearest = alt.selection_single(on='mouseover', nearest=True)
make_example(single_nearest)

In [61]:
# hold shift key to select multiple
multi = alt.selection_multi()
make_example(multi)

In [62]:
# paint the chart with mouseover
multi_mouseover = alt.selection_multi(on='mouseover', toggle=False, empty='none')
make_example(multi_mouseover)

In [63]:
# interesting combination of selections
alex = alt.selection_interval(
    on="[mousedown[event.altKey], mouseup] > mousemove",
    name='alex'
)
morgan = alt.selection_interval(
    on="[mousedown[event.shiftKey], mouseup] > mousemove",
    mark=alt.BrushConfig(fill="#fdbb84", fillOpacity=0.5, stroke="#e34a33"),
    name='morgan'
)

alt.Chart(cars).mark_rect().encode(
    x='Cylinders:O',
    y='Origin:O',
    color=alt.condition(alex | morgan, 'count()', alt.ColorValue("grey"))
).add_selection(
    alex, morgan
).properties(
    width=300,
    height=180
)


In [64]:
# select things in the legend!
selection = alt.selection_multi(fields=['Origin'])
color = alt.condition(selection,
                      alt.Color('Origin:N', legend=None),
                      alt.value('lightgray'))

scatter = alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=color,
    tooltip='Name:N'
)

legend = alt.Chart(cars).mark_point().encode(
    y=alt.Y('Origin:N', axis=alt.Axis(orient='right')),
    color=color
).add_selection(
    selection
)

scatter | legend

In [65]:
# altair supports dropdown elements! nice
input_dropdown = alt.binding_select(options=['Europe','Japan','USA'])
selection = alt.selection_single(fields=['Origin'], bind=input_dropdown, name='Country of ')
color = alt.condition(selection,
                    alt.Color('Origin:N', legend=None),
                    alt.value('lightgray'))

alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=color,
    tooltip='Name:N'
).add_selection(
    selection
)

In [66]:
# can also just filter directly
input_dropdown = alt.binding_select(options=['Europe','Japan','USA'])
selection = alt.selection_single(fields=['Origin'], bind=input_dropdown, name='Country of ')
color = alt.condition(selection,
                    alt.Color('Origin:N', legend=None),
                    alt.value('lightgray'))

alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N',
    tooltip='Name:N'
).add_selection(
    selection
).transform_filter(
    selection
)


In [67]:
# allow for zooming as well?
selection = alt.selection_interval(bind='scales')

alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N',
    tooltip='Name:N'
).add_selection(
    selection
)

In [68]:
# can access selector values in other things! (i.e. a condition)

import altair as alt
import pandas as pd
import numpy as np

rand = np.random.RandomState(42)

df = pd.DataFrame({
    'xval': range(100),
    'yval': rand.randn(100).cumsum()
})

slider = alt.binding_range(min=0, max=100, step=1, name='cutoff:')
selector = alt.selection_single(name="SelectorName", fields=['cutoff'],
                                bind=slider, init={'cutoff': 50})

alt.Chart(df).mark_point().encode(
    x='xval',
    y='yval',
    color=alt.condition(
        alt.datum.xval < selector.cutoff,
        alt.value('red'), alt.value('blue')
    )
).add_selection(
    selector
)

In [88]:
# let's get to work with our data! let's try to get a clicking on the area chart
# let's do a stacked area chart of sales per year, colored by genre
chart_stacked = alt.Chart(data_url).mark_area().encode(
    x='Year:O',
    y='sum(Global_Sales):Q',
    color='Genre:N'
)
chart_stacked

In [223]:
opacity = lambda x: alt.condition(x,
                    alt.value(1),
                    alt.value(.2))
# slider = alt.binding_range(min=0, max=100, step=1, name='cutoff:')
# selector = alt.selection_single(name="SelectorName", fields=['cutoff'], bind=slider, init={'cutoff': 50})

stacked_chart_title = 'Global Sales by Genre Per Year'

# Selectors
genre_selector = alt.selection_single(fields=['Genre'], empty='none')
publisher_selector = alt.selection_single(fields=['Publisher'], empty='none')
time_selector = alt.selection_interval(encodings=['x'])

# Text Visualizations for values selected
text_genre_selected = alt.Chart(data_url).mark_text(dx=0, dy=-20, size=20).encode(
    text='Genre:N',
    color=alt.condition(selector, 'Genre:N', alt.value('white'))
).transform_filter(
    genre_selector
)
# text_publisher_selected = alt.Chart(data_url).mark_text(dx=0, dy=0, size=20).encode(
#     text='Publisher:N',
# #     color=alt.condition(selector, 'Publisher:N', alt.value('white'))
# ).transform_filter(
#     publisher_selector
# )

chart_stacked = alt.Chart(data_url, title=stacked_chart_title).mark_area(dx=-50).encode(
    x='Year:O',
    y='sum(Global_Sales):Q',
    color=alt.Color("Genre:N",scale=alt.Scale(scheme='tableau20')),
    opacity = opacity(genre_selector)
).add_selection(
    genre_selector
)

chart_genre = alt.Chart(data_url).transform_filter(
    genre_selector
).transform_aggregate(
    sum_of_global_sales='sum(Global_Sales)',
    groupby=['Publisher']
).transform_window(
    rank='rank(sum_of_global_sales)',
    sort=[alt.SortField('sum_of_global_sales', order='descending')]
).transform_filter(
    alt.datum.rank < 10
).mark_bar().encode(
    y=alt.Y('Publisher:N', sort = '-x'), 
    x='sum_of_global_sales:Q',
#     color= alt.Color("Publisher:N",legend = None),
    opacity = opacity(publisher_selector)
).add_selection(
    publisher_selector
)

# chart_publisher = alt.Chart(data_url).mark_trail().encode(
#     x='Year:O',
#     y='sum(Global_Sales):Q',
# #     color="Publisher:N"
# ).transform_filter(
#     genre_selector
# ).transform_filter(
#     publisher_selector
# ).add_selection(
#     time_selector
# )

x = alt.vconcat(chart_stacked, text_genre_selected, chart_genre).configure(padding={"left": 300, "top": 5, "right": 5, "bottom": 200})
# x = alt.vconcat(chart_stacked, text_genre_selected, chart_genre, text_publisher_selected, chart_publisher).configure(padding={"left": 300, "top": 5, "right": 5, "bottom": 200})
x

In [201]:
x.save('x.html')

In [175]:
selector = alt.selection_single(fields=['Genre'])
# slider = alt.binding_range(min=0, max=100, step=1, name='cutoff:')
# selector = alt.selection_single(name="SelectorName", fields=['cutoff'], bind=slider, init={'cutoff': 50})
opacity = lambda x: alt.condition(x,
                    alt.value(1),
                    alt.value(.2))

stacked_chart_title = 'Global Sales by Genre Per Year'

chart_stacked = alt.Chart(data_url, title=stacked_chart_title).mark_area().encode(
    x='Year:O',
    y='sum(Global_Sales):Q',
    color=alt.Color("Genre:N",scale=alt.Scale(scheme='tableau20')),
    opacity = opacity(selector)
).add_selection(
    selector
)

# legend = alt.Chart(data_url).mark_point().encode(
#     y=alt.Y('Genre:N', axis=alt.Axis(orient='right')),
#     color="Genre:N",
# ).add_selection(
#     selector
# )

selector2 = alt.selection_single(fields=['Publisher'])

chart_genre = alt.Chart(data_url, title=publisher_plot_title).transform_filter(
    selector
).transform_aggregate(
    sum_of_global_sales='sum(Global_Sales)',
    groupby=['Publisher']
).transform_window(
    rank='rank(sum_of_global_sales)',
    sort=[alt.SortField('sum_of_global_sales', order='descending')]
).transform_filter(
    alt.datum.rank < 10
).mark_bar().encode(
    y=alt.Y('Publisher:N', sort = '-x'), 
    x='sum_of_global_sales:Q',
#     color= alt.Color("Publisher:N",legend = None),
    opacity = opacity(selector2)
).add_selection(
    selector2
)

time_selector = alt.selection_interval(encodings=['x'])

chart_publisher = alt.Chart(data_url).mark_trail().encode(
    x='Year:O',
    y='sum(Global_Sales):Q',
#     color="Publisher:N"
).transform_filter(
    selector
).transform_filter(
    selector2
).add_selection(
    time_selector
)

selector3 = alt.selection_single(fields=['Name'])

chart_name = alt.Chart(data_url).transform_filter(
    selector
).transform_filter(
    selector2
).transform_filter(
    time_selector
).transform_aggregate(
    sum_of_global_sales='sum(Global_Sales)',
    groupby=['Name']
).transform_window(
    rank='rank(sum_of_global_sales)',
    sort=[alt.SortField('sum_of_global_sales', order='descending')]
).transform_filter(
    alt.datum.rank < 10
).mark_bar().encode(
    y=alt.Y('Name:N', sort='-x'),
    x='sum_of_global_sales:Q',
#     color= "Name:N",
    opacity = opacity(selector3)
).add_selection(
    selector3
)

chart_platform = alt.Chart(data_url).transform_filter(
    selector
).transform_filter(
    selector2
).transform_filter(
    time_selector
).transform_filter(
    selector3
).mark_bar().encode(
    y='Platform:N',
    x='sum(Global_Sales):Q',
    color="Name:N"
    
)

x = ((chart_stacked | legend) & chart_genre) & (chart_publisher & chart_name & chart_platform)
x

In [113]:
x.save('x.html')

In [72]:
# Top 10 movies by IMBD rating
alt.Chart(
    vega_data.movies.url,
).mark_bar().encode(
    x=alt.X('Title:N', sort='-y'),
    y=alt.Y('IMDB_Rating:Q'),
    color=alt.Color('IMDB_Rating:Q')

).transform_window(
    rank='rank(IMDB_Rating)',
    sort=[alt.SortField('IMDB_Rating', order='descending')]
).transform_filter(
    (alt.datum.rank < 10)
)

In [73]:
import altair as alt
import pandas as pd
import numpy as np

# Excerpt from A Tale of Two Cities; public domain text
text = """
It was the best of times, it was the worst of times, it was the age of wisdom,
it was the age of foolishness, it was the epoch of belief, it was the epoch of
incredulity, it was the season of Light, it was the season of Darkness, it was
the spring of hope, it was the winter of despair, we had everything before us,
we had nothing before us, we were all going direct to Heaven, we were all going
direct the other way - in short, the period was so far like the present period,
that some of its noisiest authorities insisted on its being received, for good
or for evil, in the superlative degree of comparison only.
"""

source = pd.DataFrame(
    {'letters': np.array([c for c in text if c.isalpha()])}
)

alt.Chart(data_url).transform_aggregate(
    sum='sum(Global_Sales)',
    groupby=['Publisher']
).transform_window(
    rank='rank(sum)',
    sort=[alt.SortField('sum', order='descending')]
).transform_filter(
    alt.datum.rank < 10
).mark_bar().encode(
    y=alt.Y('Publisher:N', sort='-x'),
    x='sum:Q',
)
