# Gallery
---------

## Apply Style

In [1]:
from statworx_theme import apply_style_altair

# apply style
apply_style_altair()

## Distributions

In [46]:
import altair as alt
from vega_datasets import data
dataset = data.cars()


alt.Chart(dataset).transform_density(
    'Miles_per_Gallon',
    as_=['Miles_per_Gallon', 'density'],
).mark_area().encode(
    x="Miles_per_Gallon:Q",
    y='density:Q',
).properties(
    width=800,
    height=500,
    title="Kernel density estimation Miles per Gallon"
)

In [3]:
# Source: https://altair-viz.github.io/gallery/index.html
dataset = data.iris()

alt.Chart(dataset).mark_area().encode(
    alt.X('value:Q'),
    alt.Y('density:Q', stack='zero'),
    alt.Color('Measure:N')
).transform_fold(
    ['petalWidth',
     'petalLength',
     'sepalWidth',
     'sepalLength'],
    as_ = ['Measure', 'value']
).transform_density(
    density='value',
    groupby=['Measure'],
    extent= [-1, 9],
    steps=1000
).properties(
    width=1000, 
    height=500,
    title="Stacked density estimation - iris dataset")

In [4]:
alt.Chart(dataset).transform_fold(
    ['petalWidth',
     'petalLength',
     'sepalWidth',
     'sepalLength'],
    as_ = ['Measure', 'value']
).transform_density(
    density='value',
    groupby=['Measure'],
    extent= [-1, 9],
    steps=1000
).mark_area(orient='horizontal').encode(
    y='value:Q',
    color='Measure:N',
    x=alt.X(
        'density:Q',
        stack='center',
        title=None,
        axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
    ),
    column=alt.Column(
        'Measure:N',
        header=alt.Header(
            titleOrient='bottom',
            labelOrient='bottom',
            labelPadding=0,
        ),
    )
).properties(
    width=250,
    height=500
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
)

In [25]:
# Source: https://altair-viz.github.io/gallery/index.html
source = data.seattle_weather.url

step = 34
overlap = 1

alt.Chart(source, height=step, width=800).transform_timeunit(
    Month='month(date)'
).transform_joinaggregate(
    mean_temp='mean(temp_max)', groupby=['Month']
).transform_bin(
    ['bin_max', 'bin_min'], 'temp_max'
).transform_aggregate(
    value='count()', groupby=['Month', 'mean_temp', 'bin_min', 'bin_max']
).transform_impute(
    impute='value', groupby=['Month', 'mean_temp'], key='bin_min', value=0
).mark_area(
    interpolate='monotone',
    fillOpacity=0.8,
    stroke='lightgray',
    strokeWidth=0.5
).encode(
    alt.X('bin_min:Q', bin='binned', title='Maximum Daily Temperature (C)'),
    alt.Y(
        'value:Q',
        scale=alt.Scale(range=[step, -step * overlap]),
        axis=None
    ),
    alt.Fill(
        'mean_temp:Q',
        legend=None,
        scale=alt.Scale(domain=[30, 5], scheme='redyellowblue')
    )
).facet(
    row=alt.Row(
        'Month:T',
        title=None,
        header=alt.Header(labelAngle=0, labelAlign='right', format='%B'),
    ),

).properties(
    title='Seattle Weather',
    bounds='flush',
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).configure_title(
    anchor='end'
)

## Timeseries

In [26]:
source = data.stocks()

alt.Chart(source).mark_line().encode(
    x='date',
    y='price',
    color='symbol',
).properties(
    width=800,
    height=500
)

In [34]:
data.reset_index()

Unnamed: 0,index,A,B,C,D
0,2016-01-01,,,,
1,2016-01-02,,,,
2,2016-01-03,,,,
3,2016-01-04,,,,
4,2016-01-05,,,,
...,...,...,...,...,...
360,2016-12-26,-18.482826,10.330142,-12.108625,14.878444
361,2016-12-27,-18.693797,10.391382,-12.020502,15.376387
362,2016-12-28,-18.752957,10.062616,-11.685921,15.996722
363,2016-12-29,-18.918042,9.957435,-11.244617,16.669990


In [45]:
rs = np.random.RandomState(365)
values = rs.randn(365, 4).cumsum(axis=0)
dates = pd.date_range("1 1 2016", periods=365, freq="D")
dataset = pd.DataFrame(values, dates, columns=["A", "B", "C", "D"])
dataset = dataset.rolling(7).mean()
dataset = dataset.reset_index().rename({"index": "date"},axis=1)

alt.Chart(dataset).mark_line(size=3).transform_fold(
    ['A',
     'B',
     'C',
     'D'],
    as_ = ['Measure', 'value']
).encode(
    x='date:T',
    y='value:Q',
    color='Measure:N',
    strokeDash="Measure:N"
).properties(
    width=800,
    height=500
)

In [48]:
# Source: https://altair-viz.github.io/gallery/index.html
source = data.stocks()

highlight = alt.selection(type='single', on='mouseover',
                          fields=['symbol'], nearest=True)

base = alt.Chart(source).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
)

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=800,
    height=500
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

points + lines

## Relationship

In [53]:
# Source: https://altair-viz.github.io/gallery/index.html
source = data.iris()

alt.Chart(source).mark_circle().encode(
    alt.X('sepalLength', scale=alt.Scale(zero=False)),
    alt.Y('sepalWidth', scale=alt.Scale(zero=False, padding=1)),
    color='species',
    size='petalWidth'
).properties(
    width=800,
    height=500
)

In [55]:
dataset = data.seattle_weather()

alt.Chart(dataset[1:]).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color='weather:N'
).properties(
    width=200,
    height=200
).repeat(
    row=['precipitation', 'temp_max', 'temp_min'],
    column=['temp_min', 'temp_max', 'precipitation']
).interactive()

In [200]:
# Source: https://altair-viz.github.io/gallery/dot_dash_plot.html

source = data.cars()

# Configure the options common to all layers
brush = alt.selection(type='interval')
base = alt.Chart(source).add_selection(brush)

# Configure the points
points = base.mark_point().encode(
    x=alt.X('Miles_per_Gallon', title=''),
    y=alt.Y('Horsepower', title=''),
    color=alt.condition(brush, 'Origin', alt.value('grey'))
).properties(
    width=800,
    height=500,
    title="Scatter - Tick Plot"
)

# Configure the ticks
tick_axis = alt.Axis(labels=False, domain=False, ticks=False)

x_ticks = base.mark_tick().encode(
    alt.X('Miles_per_Gallon', axis=tick_axis),
    alt.Y('Origin', title='', axis=tick_axis),
    color=alt.condition(brush, 'Origin', alt.value('lightgrey'))
).properties(
    width=800,
)

y_ticks = base.mark_tick().encode(
    alt.X('Origin', title='', axis=tick_axis),
    alt.Y('Horsepower', axis=tick_axis),
    color=alt.condition(brush, 'Origin', alt.value('lightgrey'))
).properties(
    height=500
)

# Build the chart
(y_ticks | (points & x_ticks))

In [68]:
# Source: https://altair-viz.github.io/gallery/scatter_marginal_hist.html

source = data.iris()

base = alt.Chart(source, width=700,height=400)

xscale = alt.Scale(domain=(4.0, 8.0))
yscale = alt.Scale(domain=(1.9, 4.55))

bar_args = {'opacity': .3, 'binSpacing': 0}

points = base.mark_circle().encode(
    alt.X('sepalLength', scale=xscale),
    alt.Y('sepalWidth', scale=yscale),
    color='species',
)

top_hist = base.mark_bar(**bar_args).encode(
    alt.X('sepalLength:Q',
          # when using bins, the axis scale is set through
          # the bin extent, so we do not specify the scale here
          # (which would be ignored anyway)
          bin=alt.Bin(maxbins=20, extent=xscale.domain),
          stack=None,
          title=''
         ),
    alt.Y('count()', stack=None, title=''),
    alt.Color('species:N'),
).properties(height=100)

right_hist = base.mark_bar(**bar_args).encode(
    alt.Y('sepalWidth:Q',
          bin=alt.Bin(maxbins=20, extent=yscale.domain),
          stack=None,
          title='',
         ),
    alt.X('count()', stack=None, title=''),
    alt.Color('species:N'),
).properties(width=100)

top_hist & (points | right_hist)

In [101]:
dataset = data.iris()

# Compute the correlation matrix
corr = dataset.corr()

# Transform correlation matrix
corr = corr.stack().reset_index().rename(columns={"level_0":"variable1", "level_1":"variable2", 0: "correlation"})

base= alt.Chart(corr).encode(
    x='variable1:O',
    y='variable2:O',
).properties(
    width=800,
    height=500
)

corr_values = base.mark_text().encode(
    text=alt.Text("correlation")
)


plot = base.mark_rect().encode(
    x='variable1:O',
    y='variable2:O',
    color=alt.Color('correlation:Q', scale=alt.Scale(domain=[-1,1]))
)

plot + corr_values

## Comparison

In [136]:
import seaborn as sns

penguins = sns.load_dataset("penguins")

alt.Chart(penguins).mark_bar().encode(
    x=alt.X('sex:N', title=None),
    y='body_mass_g:Q',
    color='sex:N',
    column=alt.Column('species:N', title=None)
).properties(
    width=250,
    height=500
)

In [139]:
# Source: https://altair-viz.github.io/gallery/index.html
source = data.barley()

alt.Chart(source).mark_bar().encode(
    x='sum(yield)',
    y='variety',
    color='site',
    order=alt.Order(
      # Sort the segments of the bars by this field
      'site',
      sort='ascending'
    )
).properties(
    width=800,
    height=500
)

## Difference

In [151]:
# Source: https://altair-viz.github.io/gallery/index.html

from statworx_theme import apply_custom_colors_altair

# 19 age groups in the data to plot
apply_custom_colors_altair(n_groups_ordinal=19)

source = data.population.url

alt.Chart(source).mark_boxplot(extent=0.5, ticks=True, size=20).encode(
    x='age:O',
    y='people:Q',
    color=alt.Color("age:O")
).properties(
    width=800,
    height=500
)

In [165]:
dataset = data.iris()

scatterplot = alt.Chart(dataset).mark_point().encode(
    x=alt.X('sepalLength:Q',scale=alt.Scale(domain=[4,8])),
    y=alt.Y('sepalWidth:Q', scale=alt.Scale(domain=[0,5])),
    color='species:N'
).properties(
    width=800,
    height=500
)
 
# Altair plot color by variable
scatterplot + scatterplot.transform_regression('sepalLength', 'sepalWidth', 
        groupby=['species']).mark_line(size=3)



In [198]:
# Source: https://altair-viz.github.io/gallery/stripplot.html
import seaborn as sns

penguins = sns.load_dataset("penguins")

stripplot =  alt.Chart(penguins).mark_circle(size=35).encode(
    y=alt.Y(
        'jitter:Q',
        title=None,
        axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
        scale=alt.Scale(),
    ),
    x=alt.X('body_mass_g:Q', scale=alt.Scale(domain=[2500,6500])),
    color=alt.Color('species:N'),
    row=alt.Row(
        'sex:N',
        header=alt.Header(
            labelAngle=-90,
            titleOrient='left',
            labelOrient='left',
            labelAlign='center',
            labelPadding=3,
        ),
    ),
).transform_calculate(
    # Generate Gaussian jitter with a Box-Muller transform
    jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
).properties(
    height=100,
    width=800
)

stripplot

## Additional Plots and Inspiration

https://altair-viz.github.io/gallery/index.html

https://joelostblom.github.io/altair_ally/intro.html

https://github.com/joelostblom/altair_ally

https://github.com/justinbois/altair-catplot