In [1]:
import sys
!{sys.executable} -m pip install cvxopt



In [2]:
import warnings

import numpy as np
import pandas as pd

from lets_plot import *

In [3]:
warnings.filterwarnings('ignore')

In [4]:
LetsPlot.setup_html()

In [5]:
def quantile_regressor_predictor(xs, ys, quantile):
    from sklearn.linear_model import QuantileRegressor

    reg = QuantileRegressor(quantile=quantile, alpha=0, solver="highs").fit(xs.to_frame(), ys)
    return lambda x: reg.predict(x.reshape(-1, 1))

In [6]:
# Source: https://stats.stackexchange.com/a/407478
def lp_solver_cvxopt(c, G, h, A, b, k):
    from cvxopt import matrix, solvers

    res = solvers.lp(
        matrix(c.astype(float)),
        matrix(G.astype(float)), matrix(h.astype(float)),
        matrix(A.astype(float)), matrix(b.astype(float)),
        solver='glpk'
    )
    res_x = res['x']

    return res_x[0:k] - res_x[k:2*k]

In [7]:
def lp_solver_linprog(c, A_ub, b_ub, A_eq, b_eq, k):
    from scipy.optimize import linprog

    res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)
    res_x = res.x

    return res_x[0:k] - res_x[k:2*k]

In [8]:
def lp_predictor(solver_id):
    def get_predictor(xs, ys, quantile):
        from scipy.optimize import linprog

        X_train, y_train = xs.to_frame(), ys
        X = np.concatenate([
            np.full(X_train.shape[0], 1.0).reshape(-1, 1),
            X_train.to_numpy()
        ], axis=1)
        n, k = X.shape

        c = np.concatenate([
            np.full(2 * k, 0.0),
            quantile * np.full(n, 1.0),
            (1 - quantile) * np.full(n, 1.0)
        ])
        A_eq = np.concatenate([
            X, -X, np.identity(n), -np.identity(n)
        ], axis=1)
        m = A_eq.shape[1]
        b_eq = y_train.to_numpy().reshape(-1, 1)
        A_ub = -np.identity(m)
        b_ub = np.full(m, 0.0).reshape(-1, 1)

        if solver_id == 1:
            beta = lp_solver_cvxopt(c, A_ub, b_ub, A_eq, b_eq, k)
        elif solver_id == 2:
            beta = lp_solver_linprog(c, A_ub, b_ub, A_eq, b_eq, k)
        else:
            raise Exception("Threre is no solver with id={0}".format(solver_id))
        intercept, slope = beta

        return lambda x: slope * x + intercept

    return get_predictor

In [9]:
def stat_qantile(xs, ys, quantiles, predictor_id=1, solver_id=1):
    def stat_for(quantile):
        x_stat_name, y_stat_name, quantile_stat_name = "..x..", "..y..", "..quantile.."
        if predictor_id == 1:
            predictor = quantile_regressor_predictor
        elif predictor_id == 2:
            predictor = lp_predictor(1)
        elif predictor_id == 3:
            predictor = lp_predictor(2)
        else:
            raise Exception("Threre is no predictor with id={0}".format(predictor_id))
        x_stat = np.array([xs.min(), xs.max()])
        y_stat = predictor(xs, ys, quantile)(x_stat)
        return pd.DataFrame({x_stat_name: x_stat, y_stat_name: y_stat}).assign(**{quantile_stat_name: quantile})

    return pd.concat([stat_for(quantile) for quantile in quantiles])

In [10]:
mapping = aes("..x..", "..y..", group="..quantile..", color="..quantile..")
quantiles = [.01, .25, .5, .75, .99]
scale_layer = scale_color_gradient(low="red", high="green")

In [11]:
df = pd.DataFrame({
    "x": [0, 0, 1, 1, 1, 2, 2, 3],
    "y": [0, 1, 0, 1, 2, 1, 2, 3],
})

iris_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv")

In [12]:
def plot1(predictor_id):
    stat_data = stat_qantile(df.x, df.y, quantiles=quantiles, predictor_id=predictor_id)
    return ggplot() + \
        geom_point(aes("x", "y"), data=df, size=5, color="black") + \
        geom_line(mapping, data=stat_data, size=1) + \
        scale_layer + \
        ggtitle("Plot 1 (predictor id: {0})".format(predictor_id))

gggrid([plot1(1), plot1(2), plot1(3)], ncol=2)

In [13]:
def plot2(predictor_id):
    stat_data = stat_qantile(iris_df["petal_length"], iris_df["petal_width"], \
                             quantiles=quantiles, predictor_id=predictor_id)
    return ggplot() + \
        geom_point(aes("petal_length", "petal_width"), data=iris_df, size=3, color="black") + \
        geom_line(mapping, data=stat_data, size=1) + \
        scale_layer + \
        ggtitle("Plot 2 (predictor id: {0})".format(predictor_id))

gggrid([plot2(1), plot2(2), plot2(3)], ncol=2)