# Построение графика квантильной регрессии

## 1. Подготовка

#### 1.1. Импорты и инициализации

In [1]:
import sys
!{sys.executable} -m pip install sympy
!{sys.executable} -m pip install cvxopt



In [2]:
import warnings

import numpy as np
import pandas as pd

from lets_plot import *

In [3]:
warnings.filterwarnings('ignore')

In [4]:
LetsPlot.setup_html()

#### 1.2. Различные варианты вычисления квантильной регрессии

In [5]:
def quantile_regressor_predictor(xs, ys, quantile):
    from sklearn.linear_model import QuantileRegressor

    reg = QuantileRegressor(quantile=quantile, alpha=0, solver="highs").fit(xs.to_frame(), ys)
    return lambda x: reg.predict(x.reshape(-1, 1))

In [6]:
def lp_solver_cvxopt(c, G, h, A, b):
    from cvxopt import matrix, solvers

    res = solvers.lp(
        matrix(c.astype(float)),
        matrix(G.astype(float)), matrix(h.astype(float)),
        matrix(A.astype(float)), matrix(b.astype(float)),
        solver='glpk'
    )

    return res['x']

In [7]:
def lp_solver_linprog(c, A_eq, b_eq):
    from scipy.optimize import linprog

    res = linprog(c, A_eq=A_eq, b_eq=b_eq) # OLD: linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)

    return res.x

In [8]:
# https://github.com/hasan-kamal/Linear-Program-Solvers/blob/master/solvers/interior_point_solver.py

class InteriorPointSolver:
    from numpy.linalg import matrix_rank
    import sympy

    def solve(self, c, A, b, epsilon=0.0001):
        # ensure dimensions are okay
        assert A.shape[0] == b.shape[0], 'first dims of A and b must match, check input!'
        assert A.shape[1] == c.shape[0], 'second dim of A must match first dim of c, check input!'

        # ensure A is full rank, drop redundant rows if not
        if self.matrix_rank(A) < min(A.shape[0], A.shape[1]):
            _, pivots = self.sympy.Matrix(A).T.rref()
            A = A[list(pivots)]

        m = A.shape[0]
        n = A.shape[1]

        # initial solution (x_0, lambda_0, s_0) > 0 [lambda is variable l in code]
        x = np.ones(shape=(n, ))
        l = np.ones(shape=(m, ))
        s = np.ones(shape=(n, ))

        # set iteration counter to 0 and mu_0
        k = 0

        # main loop body
        while abs(np.dot(x, s)) > epsilon:
            # increase iteration number
            k += 1

            # choose sigma_k and calculate mu_k
            sigma_k = 0.4
            mu_k = np.dot(x, s) / n

            # create linear system A_ * delta = b_
            A_ = np.zeros(shape=(m + n + n, n + m + n))
            A_[0:m, 0:n] = np.copy(A)
            A_[m:m + n, n:n + m] = np.copy(A.T)
            A_[m:m + n, n + m:n + m + n] = np.eye(n)
            A_[m + n:m + n + n, 0:n] = np.copy(np.diag(s))
            A_[m + n:m + n + n, n + m:n + m + n] = np.copy(np.diag(x))

            b_ = np.zeros(shape=(n + m + n, ))
            b_[0:m] = np.copy(b - np.dot(A, x))
            b_[m:m + n] = np.copy(c - np.dot(A.T, l) - s)
            b_[m + n:m + n + n] = np.copy( sigma_k * mu_k * np.ones(shape=(n, )) - np.dot(np.dot(np.diag(x), np.diag(s)), np.ones(shape=(n, ))) )

            # solve for delta
            delta = np.linalg.solve(A_, b_)
            delta_x = delta[0:n]
            delta_l = delta[n:n + m]
            delta_s = delta[n + m:n + m + n]

            # find step-length alpha_k
            alpha_max = 1.0
            for i in range(n):
                if delta_x[i] < 0:
                    alpha_max = min(alpha_max, -x[i]/delta_x[i])
                if delta_s[i] < 0:
                    alpha_max = min(alpha_max, -s[i]/delta_s[i])
            eta_k = 0.99
            alpha_k = min(1.0, eta_k * alpha_max)

            # create new iterate
            x = x + alpha_k * delta_x
            l = l + alpha_k * delta_l
            s = s + alpha_k * delta_s

        return x

def lp_solver_int_point(c, A, b):
    return InteriorPointSolver().solve(c, A, b.reshape(-1))

In [9]:
def steepest_edge_simplex(_c, _A, _b):
    # See: D. Goldfarb, J. K. Reid, "A practicable steepest-edge simplex algorithm"
    '''
    Problem:
    T(c) * x -> min
    Constraints:
    A * x = b
    x >= 0
    '''
    from numpy import linalg as la

    def get_pivot_indices(A):
        from sympy import Matrix

        return Matrix(A).rref()[1]

    def get_row(A, i):
        return np.array([A[i, :]])

    def get_column(A, i):
        return A[:, i].reshape(-1, 1)

    def e(n, i):
        return np.eye(n)[:, i].reshape(-1, 1)

    def permutation_matrix(n, i, j):
        P = np.eye(n)
        P[:, [i, j]] = P[:, [j, i]]
        return P

    c = np.array(_c)
    if len(c.shape) == 1:
        c = c.reshape(-1, 1)
    A = np.array(_A)
    m, n = A.shape
    b = np.array(_b)
    if len(b.shape) == 1:
        b = b.reshape(-1, 1)

    def is_optimal(z):
        return np.all(z[m:, :] >= 0)

    def get_pivot_column_index(z, gamma):
        max_value = -1
        q = -1
        for i in range(m, n):
            z_i = z[i, 0]
            if z_i >= 0:
                continue
            value = z_i**2 / gamma[i, 0]
            if value > max_value:
                max_value = value
                q = i
        return q

    def is_unbounded(w):
        return np.all(w <= 0)

    def get_pivot_index(x, w):
        min_value = abs(x.max()) / (abs(w.min()) + 1) + 1
        p = -1
        for i in range(m):
            w_i = w[i, 0]
            if w_i <= 0:
                continue
            value = x[i, 0] / w_i
            if value < min_value:
                min_value = value
                p = i
        return p

    def revise_x(x, w, p):
        x_p = x[p, 0] / w_p
        revised_x = x - np.block([[w * x_p], [np.zeros((n - m, 1))]])
        revised_x[p, 0] = x_p
        return revised_x

    def revise_z(z, alpha, w_p, p, q):
        revised_z = z.copy()
        for i in range(n):
            if i < m:
                continue
            if i == q:
                revised_z[i, 0] = -z[i, 0] / w_p
            else:
                revised_z[i, 0] = z[i, 0] - alpha[i, 0] * z[q, 0]
        return revised_z

    def revise_gamma(gamma, alpha, wTA, w_p, p, q):
        revised_gamma = gamma.copy()
        for i in range(n):
            if i < m:
                continue
            if i == q:
                revised_gamma[i, 0] = gamma[i, 0] / w_p**2
            else:
                revised_gamma[i, 0] = max(
                    gamma[i, 0] - 2 * alpha[i, 0] * wTA[i, 0] + alpha[i, 0]**2 * gamma[q, 0],
                    1 + alpha[i, 0]**2
                )
        return revised_gamma

    assert m < n, "Bad shape of matrix A: {0}".format(A.shape)
    assert b.shape[0] == m and b.shape[1] == 1, "Bad shape of matrix b: {0}".format(b.shape)
    assert c.shape[0] == n and c.shape[1] == 1, "Bad shape of matrix c: {0}".format(c.shape)

    pivot_indices = get_pivot_indices(A)
    assert len(pivot_indices) == m, \
        "Constraint matrix should have {0} independent columns, but have only {1}".format(m, len(pivot_indices))
    dependent_indices = tuple(set(range(n)) - set(pivot_indices))
    A1 = A[:, pivot_indices]
    A1_inv = la.inv(A1)
    x = np.block([[np.dot(A1_inv, b)], [np.zeros((n - m, 1))]])
    A2 = A[:, dependent_indices]
    A = np.block([[A1, A2]])
    c1 = c[pivot_indices, :]
    c2 = c[dependent_indices, :]
    c = np.block([[c1], [c2]])
    N = np.block([[A1, A2], [np.zeros((n - m, m)), np.eye(n - m)]])
    N_inv = np.block([[A1_inv, -np.dot(A1_inv, A2)], [np.zeros((n - m, m)), np.eye(n - m)]])
    z = np.dot(c.T, N_inv).T
    gamma = np.diag(np.dot(N_inv.T, N_inv)).reshape(-1, 1)

    iteration_number = 0

    while True:
        # (a)
        if is_optimal(z):
            indices = np.array(pivot_indices + dependent_indices).reshape(-1, 1)
            x_indexed = np.block([x, indices])
            x_sorted = x_indexed[x_indexed[:, 1].argsort()]
            return x_sorted[:, 0]
        # (b)
        q = get_pivot_column_index(z, gamma)
        w = np.dot(A1_inv, get_column(A, q))
        # (c)
        if is_unbounded(w):
            raise Exception("problem is unbounded")
            return None
        # (d)
        p = get_pivot_index(x, w)
        w_p = w[p, 0]
        # (e)
        revised_x = revise_x(x, w, p)
        # (f)
        z_q = c[q, 0] - np.dot(c1.T, w)[0, 0]
        gamma_q = 1 + np.dot(w.T, w)[0, 0]
        w = np.dot(A1_inv.T, w)
        # revise indices
        pivot_new_index_at_p = dependent_indices[q - m]
        dependent_new_index_at_q = pivot_indices[p]
        pivot_indices = tuple([(pivot_new_index_at_p if i == p else idx) for i, idx in enumerate(pivot_indices)])
        dependent_indices = tuple([(dependent_new_index_at_q if i == q - m else idx) for i, idx in enumerate(dependent_indices)])
        # revise N
        revised_N = np.dot((N + np.dot(e(n, q), (e(n, p) - e(n, q)).T)), permutation_matrix(n, p, q))
        revised_N_inv = la.inv(revised_N)
        # revise A1, A2
        revised_A1 = revised_N[:m, :m]
        revised_A1_inv = la.inv(revised_A1)
        revised_A2 = revised_N[:m, m:]
        revised_A = np.block([[revised_A1, revised_A2]])
        # y
        y = np.dot(revised_A1_inv, e(m, p)) # TODO: not used?
        # alpha
        alpha = -get_row(N_inv, p).reshape(-1, 1) # = get_row(np.dot(A1_inv, A2), p).reshape(-1, 1)
        revised_alpha = alpha / alpha[q, 0] # = get_row(np.dot(revised_A1_inv, revised_A2), p).reshape(-1, 1)
        # revise z and gamma
        revised_z = revise_z(z, revised_alpha, w_p, p, q)
        revised_gamma = revise_gamma(gamma, alpha, np.dot(w.T, A).reshape(-1, 1), w_p, p, q)

        # revision
        x = revised_x.copy()
        N = revised_N.copy()
        N_inv = revised_N_inv.copy()
        A1 = revised_A1.copy()
        A1_inv = revised_A1_inv.copy()
        A2 = revised_A2.copy()
        A = revised_A.copy()
        z = revised_z.copy()
        gamma = revised_gamma.copy()

        iteration_number += 1
        if iteration_number == 1_000_000:
            raise Exception("too long computation")
            break

def lp_solver_simplex(c, A, b):
    return steepest_edge_simplex(c, A, b)

In [10]:
# Source: https://stats.stackexchange.com/a/407478

def lp_predictor(solver_name):
    def get_predictor(xs, ys, quantile):
        from scipy.optimize import linprog

        X_train, y_train = xs.to_frame(), ys
        X = np.concatenate([
            np.full(X_train.shape[0], 1.0).reshape(-1, 1),
            X_train.to_numpy()
        ], axis=1)
        n, k = X.shape

        c = np.concatenate([
            np.full(2 * k, 0.0),
            quantile * np.full(n, 1.0),
            (1 - quantile) * np.full(n, 1.0)
        ])
        A_eq = np.concatenate([
            X, -X, np.identity(n), -np.identity(n)
        ], axis=1)
        m = A_eq.shape[1]
        b_eq = y_train.to_numpy().reshape(-1, 1)
        A_ub = -np.identity(m)
        b_ub = np.full(m, 0.0).reshape(-1, 1)

        if solver_name == "cvxopt":
            beta = lp_solver_cvxopt(c, A_ub, b_ub, A_eq, b_eq)
        elif solver_name == "linprog":
            beta = lp_solver_linprog(c, A_eq, b_eq)
        elif solver_name == "int_point":
            beta = lp_solver_int_point(c, A_eq, b_eq)
        elif solver_name == "simplex":
            beta = lp_solver_simplex(c, A_eq, b_eq)
        else:
            raise Exception("Threre is no solver {0}".format(solver_name))
        intercept, slope = beta[0:k] - beta[k:2*k]

        return lambda x: slope * x + intercept

    return get_predictor

#### 1.3. Статистика

In [11]:
def stat_qantile(xs, ys, quantiles, predictor_name=None, solver_name=None):
    def stat_for(quantile):
        x_stat_name, y_stat_name, quantile_stat_name = "..x..", "..y..", "..quantile.."
        if predictor_name == "QR":
            predictor = quantile_regressor_predictor
        elif predictor_name == "LP":
            predictor = lp_predictor(solver_name)
        else:
            raise Exception("Threre is no predictor {0}".format(predictor_name))
        x_stat = np.array([xs.min(), xs.max()])
        y_stat = predictor(xs, ys, quantile)(x_stat)
        return pd.DataFrame({x_stat_name: x_stat, y_stat_name: y_stat}).assign(**{quantile_stat_name: quantile})

    return pd.concat([stat_for(quantile) for quantile in quantiles])

## 2. Графики

In [12]:
mapping = aes("..x..", "..y..", group="..quantile..", color="..quantile..")
quantiles = [.01, .25, .5, .75, .99]
scale_layer = scale_color_gradient(low="red", high="green")

#### 2.1. Простые данные

In [13]:
df = pd.DataFrame({
    "x": [0, 0, 1, 1, 1, 2, 2, 3],
    "y": [0, 1, 0, 1, 2, 1, 2, 3],
})
df.head()

Unnamed: 0,x,y
0,0,0
1,0,1
2,1,0
3,1,1
4,1,2


In [14]:
def plot1(predictor_name, solver_name=None):
    stat_data = stat_qantile(df.x, df.y, quantiles=quantiles, predictor_name=predictor_name, solver_name=solver_name)
    return ggplot() + \
        geom_point(aes("x", "y"), data=df, size=5, color="black") + \
        geom_line(mapping, data=stat_data, size=1) + \
        scale_layer + \
        ggtitle("({0}, {1})".format(predictor_name, solver_name))

In [15]:
gggrid([plot1("QR"), plot1("LP", "cvxopt"), plot1("LP", "linprog"), plot1("LP", "int_point"), plot1("LP", "simplex")], ncol=2)

#### 2.2. Реальные данные и сравнение скорости алгоритмов

In [16]:
iris_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv")
iris_df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [17]:
def plot2(predictor_name, solver_name=None):
    def get_stat():
        return stat_qantile(iris_df["petal_length"], iris_df["petal_width"], \
                            quantiles=quantiles, predictor_name=predictor_name, solver_name=solver_name)
    stat_data = get_stat()
    t = %timeit -r 1 -n 2 -o get_stat() # TODO: t = %timeit -r 5 -n 50 -o get_stat()
    return ggplot() + \
        geom_point(aes("petal_length", "petal_width"), data=iris_df, size=3, color="black") + \
        geom_line(mapping, data=stat_data, size=1) + \
        scale_layer + \
        ggtitle("({0}, {1}) ~ {2:.4f} sec".format(predictor_name, solver_name, t.average))

In [18]:
gggrid([plot2("QR"), plot2("LP", "cvxopt"), plot2("LP", "linprog"), plot2("LP", "int_point"), plot2("LP", "simplex")], ncol=2)

17.2 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)
41.4 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)
12.8 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)
20.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)
46.3 s ± 0 ns per loop (mean ± std. dev. of 1 run, 2 loops each)
