# Altair - Transformation

In [1]:
from vega_datasets import data # pip install vega_datasets
import altair as alt  # pip install altair
import pandas as pd
import numpy as np

## Load Data

In [2]:
dataset = data.cars()
dataset.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


## Transform Aggregate

In [4]:
alt.Chart(dataset).mark_bar().encode(
    y='Cylinders:O',
    x='mean_acc:Q'
).transform_aggregate(
    mean_acc='mean(Acceleration)',
    groupby=["Cylinders"]
)

## Transform Calculate

In [5]:
dataset = pd.DataFrame({'t': range(101)})

In [6]:
alt.Chart(dataset).mark_line().encode(
    x='x:Q',
    y='y:Q',
    order='t:Q'
).transform_calculate(
    x='cos(datum.t * PI / 50)',
    y='sin(datum.t * PI / 25)'
)

## Transform Filter

In [7]:
dataset = data.population()

### Equal

In [9]:
alt.Chart(dataset).mark_line().encode(
    x='age:O',
    y='sum(people):Q',
    color='year:O'
).transform_filter(
    alt.FieldEqualPredicate(field='year', equal=2000)
)

### One Of

In [10]:
alt.Chart(dataset).mark_line().encode(
    x='age:O',
    y='sum(people):Q',
    color='year:O'
).transform_filter(
    alt.FieldOneOfPredicate(field='year', oneOf=[1900, 1950, 2000])
)

### Range

In [11]:
alt.Chart(dataset).mark_line().encode(
    x='age:O',
    y='sum(people):Q',
    color='year:O'
).transform_filter(
    alt.FieldRangePredicate(field='year', range=[1960, 2000])
)

## Transform Impute

In [12]:
dataset = pd.DataFrame({
    't': range(7),
    'x': [1, 3, np.nan, 1, np.nan, np.nan, 2.5],
    'y': [np.nan, np.nan, 7, np.nan, 6, 4, 6]
}).melt('t').dropna()

dataset

Unnamed: 0,t,variable,value
0,0,x,1.0
1,1,x,3.0
3,3,x,1.0
6,6,x,2.5
9,2,y,7.0
11,4,y,6.0
12,5,y,4.0
13,6,y,6.0


In [13]:
raw = alt.Chart(dataset).mark_line(point=True).encode(
    x=alt.X('t:Q'),
    y='value:Q',
    color='variable:N'
)
raw

In [14]:
background = raw.encode(opacity=alt.value(0.4))

### Fixed Imputation

In [15]:
fixed_imputation = alt.Chart(dataset).mark_line(point=True).encode(
    x='t:Q',
    y='value:Q',
    color='variable:N'
).transform_impute(
    impute='value',
    key='t',
    method='mean',
    groupby=['variable']
)

background + fixed_imputation

### Window Imputation

In [16]:
window_imputation = alt.Chart(dataset).mark_line(point=True).encode(
    x='t:Q',
    y='value:Q',
    color='variable:N'
).transform_impute(
    impute='value',
    key='t',
    method='mean',
    frame=[-3, 3],
    groupby=['variable']
)

background + window_imputation

### Fixed vs Window

In [18]:
fixed_imputation.encode(strokeDash=alt.value([10, 10])) + window_imputation.encode(strokeDash=alt.value([1, 1]))

## Transform Regression

In [19]:
np.random.seed(20210928)

dataset = pd.DataFrame({
    'x': range(100),
    'y': np.random.randn(100).cumsum()
})


### Linear / Logarithmic / Polynomial

In [20]:
chart = alt.Chart(dataset).mark_point().encode(
    x='x',
    y='y'
)

loess = chart.transform_regression('x', 'y', method="poly", order=5).mark_line()

chart + loess

### LOESS

In [21]:
chart = alt.Chart(dataset).mark_point().encode(
    x='x',
    y='y'
)

loess = chart.transform_loess('x', 'y', bandwidth=0.3).mark_line()

chart + loess

## Transform Quantile

In [22]:
np.random.seed(20210928)

dataset = pd.DataFrame({'x': np.random.randn(200)})

In [23]:
alt.Chart(dataset).mark_point().encode(
    x='prob:Q',
    y='value:Q'
).transform_quantile('x')

## Transform Sample

In [24]:
np.random.seed(20210928)

dataset = pd.DataFrame({
    'x': np.random.randn(2000),
    'y': np.random.randn(2000)
})

In [25]:
chart = alt.Chart(dataset).mark_point().encode(
    x='x',
    y='y',
)

chart | chart.transform_sample(200)

## Transform Window

In [26]:
dataset = data.stocks()
dataset = dataset[dataset["symbol"] == "GOOG"]
dataset.head()

Unnamed: 0,symbol,date,price
369,GOOG,2004-08-01,102.37
370,GOOG,2004-09-01,129.6
371,GOOG,2004-10-01,190.64
372,GOOG,2004-11-01,181.98
373,GOOG,2004-12-01,192.79


In [27]:
base = alt.Chart(dataset).mark_point().encode(
    x='date:T',
    y='price:Q',
)

moving_average = base.mark_line().encode(
    x='date:T',
    y='moving_average:Q',
    color=alt.value("firebrick")
).transform_window(
    moving_average='mean(price)',
    frame=[-2, 2],
)

base + moving_average