# Residual plot

In [1]:
import numpy as np
import pandas as pd

from lets_plot import *
LetsPlot.setup_html()

In [2]:
def plot_matrix(plots=[], width=400, height=300, columns=2):
    bunch = GGBunch()
    for i in range(len(plots)):
        row = int(i / columns)
        column = i % columns
        bunch.add_plot(plots[i], column * width, row * height, width, height)
    return bunch.show()

In [3]:
# Примерное содержимое модуля lets_plot.bistro.residual

from functools import reduce

from lets_plot.plot.core import PlotSpec

def _mean(l):
    return reduce(lambda s, el: s + el, l) / len(l)

def _deviations_sum(l1, l2):
    if len(l1) != len(l2):
        raise Exception("Both data series must have equal size, but {0} != {1}".format(len(l1), len(l2)))
    mean1 = _mean(l1)
    mean2 = _mean(l2)
    return reduce(lambda s, i: s + l1[i] * l2[i], range(len(l1))) - len(l1) * mean1 * mean2

def residual_plot(data=None, x=None, y=None, *,
                  method="lm",
                  geom="point",
                  bins=None, binwidth=None,
                  color=None, fill=None, size=None, alpha=None,
                  color_by=None,
                  hline=True, marginal="dens:r"):
    # prepare data
    xs = data[x]
    ys = data[y]
    if len(xs) != len(ys):
        raise Exception("All data series in data frame must have equal size "
                        "{x_col} : {x_len} {y_col} : {y_len}".format(
            x_col=x,
            y_col=y,
            x_len=len(xs),
            y_len=len(ys)
        ))
    n = len(xs)
    if n < 2:
        raise Exception("Data should have at least two points.")
    cross_deviations_sum = _deviations_sum(xs, ys)
    x_squared_deviations_sum = _deviations_sum(xs, xs)
    if x_squared_deviations_sum == 0:
        raise Exception("Sum of squared deviations of x shouldn't be equal to zero.")
    slope = cross_deviations_sum / x_squared_deviations_sum
    intercept = _mean(ys) - slope * _mean(xs)
    residuals = [ys[i] - (slope * xs[i] + intercept) for i in range(n)]
    residual_col = "..residual.."
    data[residual_col] = residuals
    # prepare mapping
    mapping_dict = {'x': x, 'y': residual_col}
    if color_by != None:
        mapping_dict['color'] = color_by
    # prepare scales
    scales = [ylab("{0} residual".format(y))]
    # prepare layers
    layers = []
    #   - main layer
    if geom == "point":
        layers.append(geom_point(color=color, size=size, alpha=alpha))
    elif geom == "tile":
        layers.append(geom_bin2d(bins=bins, binwidth=binwidth, color=color, fill=fill, size=size, alpha=alpha))
    elif geom == "blank":
        pass
    else:
        raise Exception("Unknown geom '{0}'".format(geom))
    #   - hline layer
    if hline:
        layers.append(geom_hline(yintercept=0, color='black', alpha=.5))
    #   - marginal layers
    if isinstance(marginal, str):
        for marginal_layer in marginal.split(","):
            marginal_geom, sides = marginal_layer.split(":") if ":" in marginal_layer else ("dens", marginal_layer)
            for side in sides:
                layer = None
                if marginal_geom in ["dens", "density"]:
                    layer = geom_density()
                elif marginal_geom in ["hist", "histogram"]:
                    layer = geom_histogram()
                elif marginal_geom in ["box", "boxplot"]:
                    layer = geom_boxplot()
                else:
                    raise Exception("Unknown marginal geom '{0}'".format(marginal_geom))
                layers.append(ggmarginal(side, layer=layer))

    return PlotSpec(data=data, mapping=aes(**mapping_dict), scales=scales, layers=layers)

In [4]:
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv")
print(df.shape)
df.head()

(150, 5)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


## Default plot

In [5]:
residual_plot(df, "petal_length", "petal_width")

## Change geom type

In [6]:
residual_plot(df, "petal_length", "petal_width", geom="tile")

## Geometry customization

In [7]:
residual_plot(df, "petal_length", "petal_width", size=5, color="#feb24c", alpha=1/3)

## Geometry high customization

In [8]:
residual_plot(df, "petal_length", "petal_width", geom="blank") + \
    geom_point(size=5, shape=21, color="black", fill="#feb24c")

## Remove standard layers and add custom one

In [9]:
residual_plot(df, "petal_length", "petal_width", hline=False, marginal=None) + \
    geom_smooth(method='loess', se=True, level=.99, seed=42)

## Grouping

In [10]:
residual_plot(df, "petal_length", "petal_width", color_by="species")

## `hline` high customization

In [11]:
residual_plot(df, "petal_length", "petal_width", hline=False) + geom_hline(yintercept=0, color="magenta")

## `marginal` customization

In [12]:
residual_plot(df, "petal_length", "petal_width", marginal="dens:lb,hist:tr")

## `marginal` high customization

In [13]:
residual_plot(df, "petal_length", "petal_width", marginal=False) + ggmarginal("tr", layer=geom_violin())

## Non-geometric layers

In [14]:
residual_plot(df, "petal_length", "petal_width") + \
    facet_grid(x="species") + \
    ggtitle("Facets")

In [15]:
residual_plot(df, "petal_length", "petal_width", color_by="species") + \
    scale_color_brewer(type="qual", palette="Dark2") + \
    ggtitle("Scales")

In [16]:
residual_plot(df, "petal_length", "petal_width", geom="tile", binwidth=[.4, .1], marginal=None) + \
    coord_fixed(ratio=.25, flip=True) + \
    ggtitle("Coordinates")

In [17]:
residual_plot(df, "petal_length", "petal_width", color="white", hline=False, marginal=None) + \
    ylab("residual") + \
    theme_bw() + theme(text=element_text(family="monospace")) + \
    flavor_high_contrast_dark() + \
    ggtitle("Theme and flavor")