# Regression Lines using Altair

**Load Python Libraries:**

In [2]:
import numpy as np
import pandas as pd

from pandas import DataFrame, read_csv
import altair as alt
#alt.renderers.enable('altair_viewer')

import altair_transform
alt.data_transformers.disable_max_rows()

import matplotlib.pyplot as plt

## Typical Regression Example:

I cannot find the source for this example that I found on the web.

In [3]:
np.random.seed(42)
x = np.linspace(0, 10)
y = np.e ** (-0.1 *x)  + np.random.randn(len(x)) / 5
group = np.random.choice(['a','b'],len(x))

df = pd.DataFrame({'x': x, 'y': y, 'group': group})
df.loc[df.group == 'a', 'y'] *= 0.1 


chart = alt.Chart(df).mark_point().encode(
    x='x',
    y= alt.Y('y',scale = alt.Scale(type= 'log')),
    color='group'

).properties(
    title="Data",
)


Reg_Line = chart.transform_regression('x', 'y',
                                      method="exp",
                                      groupby=["group"]
).mark_line()

Reg_Params = chart.transform_regression('x', 'y',
                                     method="exp",
                                     groupby=["group"],
                                     params=True   
).mark_text(align='left', lineBreak='\n'
).encode(
    x=alt.value(150),  # pixels from left
    y=alt.value(250),  # pixels from top
    text='params:N'
).transform_calculate(
    params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + \
    round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "')

chart + Reg_Line + Reg_Params



## Regression on Selected Samples only:

Select samples from the cross plot on the left and the regression line will be shown on the cross plot to the right. The regression equation is only valid if you have selected samples from only one group. Otherwise, there is overprint.


It is the transform_filter in chart that allows this to happen. Joel Ostblom of Altair told us about this transform_filter, and it works. You create a regression line for the selected samples. If you sample from both groups, then you will get 2 regression lines.

In [15]:
brush = alt.selection(type='interval')


np.random.seed(42)
x = np.linspace(0, 10)
y = np.e ** (-0.1 *x)  + np.random.randn(len(x)) / 5
group = np.random.choice(['a','b'],len(x))

df = pd.DataFrame({'x': x, 'y': y, 'group': group})
df.loc[df.group == 'a', 'y'] *= 0.1 


chart1 = alt.Chart(df).mark_point(filled=True, size=100).encode(
    x='x',
    y= alt.Y('y',scale = alt.Scale(type= 'log')),
    color=alt.condition(brush, 'group:N', alt.value('lightgray'))
).add_selection(
    brush
)

chart = alt.Chart(df).mark_point(filled=True, size=100).encode(
    x='x',
    y= alt.Y('y',scale = alt.Scale(type= 'log')),
    color='group'

).transform_filter(
    brush
)


#['linear', 'log', 'exp', 'pow', 'quad', 'poly']

Reg_Line = chart.transform_regression('x', 'y',
                                      method="exp",
                                      groupby=["group"]
).mark_line()

Reg_Params = chart.transform_regression('x', 'y',
                                     method="exp",
                                     groupby=["group"],
                                     params=True   
).mark_text(align='left', lineBreak='\n'
).encode(
    x=alt.value(150),  # pixels from left
    y=alt.value(250),  # pixels from top
    text='params:N'
).transform_calculate(
    params='"r² = " + round(datum.rSquared * 100)/100 + \
    "      y = " + round(datum.coef[0] * 10)/10 + " + e ^ (" + \
    round(datum.coef[1] * 10000)/10000 + "x" + ")" + \n + " "',

)

chart1 | chart + Reg_Line + Reg_Params

