In [1]:
import sys
!{sys.executable} -m pip install cvxopt



In [2]:
import numpy as np
import pandas as pd

from lets_plot import *

In [3]:
LetsPlot.setup_html()

In [4]:
df = pd.DataFrame({
    "x": [0, 0, 1, 1, 1, 2, 2, 3],
    "y": [0, 1, 0, 1, 2, 1, 2, 3],
})

iris_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/iris.csv")

In [5]:
def predictor2(xs, ys, quantile):
    from cvxopt import matrix, solvers

    X = pd.DataFrame({1: xs.astype(float)})
    X[0] = 1.0  # intercept

    K = X.shape[1]
    N = X.shape[0]

    # equality constraints - left hand side

    A1 = X.to_numpy()  # intercepts & data points - positive weights
    A2 = X.to_numpy() * -1  # intercept & data points - negative weights
    A3 = np.identity(N)  # error - positive
    A4 = np.identity(N) * -1  # error - negative

    A = np.concatenate((A1, A2, A3, A4), axis=1)  # all the equality constraints

    # equality constraints - right hand side
    b = ys.astype(float).to_numpy()

    # goal function - intercept & data points have 0 weights
    # positive error has quantile weight, negative error has 1-quantile weight
    c = np.concatenate((np.repeat(0, 2 * K), quantile * np.repeat(1, N), (1 - quantile) * np.repeat(1, N)))

    # converting from numpy types to cvxopt matrix

    Am = matrix(A)
    print("A:")
    print(Am)
    bm = matrix(b)
    print("b:")
    print(bm)
    cm = matrix(c)
    print("c:")
    print(cm)

    # all variables must be greater than zero
    # adding inequality constraints - left hand side
    n = Am.size[1]
    G = matrix(0.0, (n, n))
    G[:: n + 1] = -1.0
    print("G:")
    print(G)

    # adding inequality constraints - right hand side (all zeros)
    h = matrix(0.0, (n, 1))
    print("h:")
    print(h)

    # solving the model
    sol = solvers.lp(cm, G, h, Am, bm, solver='glpk')

    sol_x = sol['x']

    # both negative and positive components get values above zero, this gets fixed here
    slope, intercept = sol_x[0:K] - sol_x[K:2*K]

    return lambda x: slope * x + intercept

predictor2(df["x"], df["y"], .5)(np.array([0.0, 1.0]))

A:
[ 0.00e+00  1.00e+00 -0.00e+00 -1.00e+00  1.00e+00  0.00e+00  0.00e+00 ... ]
[ 0.00e+00  1.00e+00 -0.00e+00 -1.00e+00  0.00e+00  1.00e+00  0.00e+00 ... ]
[ 1.00e+00  1.00e+00 -1.00e+00 -1.00e+00  0.00e+00  0.00e+00  1.00e+00 ... ]
[ 1.00e+00  1.00e+00 -1.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 1.00e+00  1.00e+00 -1.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 2.00e+00  1.00e+00 -2.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 2.00e+00  1.00e+00 -2.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 3.00e+00  1.00e+00 -3.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]

b:
[ 0.00e+00]
[ 1.00e+00]
[ 0.00e+00]
[ 1.00e+00]
[ 2.00e+00]
[ 1.00e+00]
[ 2.00e+00]
[ 3.00e+00]

c:
[ 0.00e+00]
[ 0.00e+00]
[ 0.00e+00]
[ 0.00e+00]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]

G:
[-1.00e+00  0.00e+00  0.00e+00  0

array([0., 1.])

In [6]:
def predictor3(xs, ys, quantile):
    from scipy.optimize import linprog

    X_train, y_train = xs.to_frame(), ys

    X = np.concatenate([
        np.full(X_train.shape[0], 1.0).reshape(-1, 1),
        X_train.to_numpy()
    ], axis=1)

    n, k = X.shape

    c = np.concatenate([
        np.full(2 * k, 0.0),
        quantile * np.full(n, 1.0),
        (1 - quantile) * np.full(n, 1.0)
    ])

    A_eq = np.concatenate([
        X, -X, np.identity(n), -np.identity(n)
    ], axis=1)
    print("A:")
    print(A_eq)

    m = A_eq.shape[1]

    b_eq = y_train.to_numpy().reshape(-1, 1)
    print("b:")
    print(b_eq)

    A_ub = -np.identity(m)
    print("G:")
    print(A_ub)

    b_ub = np.full(m, 0.0).reshape(-1, 1)
    print("h:")
    print(b_ub)

    res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq)

    beta = res.x[0:k] - res.x[k:2*k]

    slope, intercept = beta

    return lambda x: slope * x + intercept

predictor2(df["x"], df["y"], .5)(np.array([0.0, 1.0]))

A:
[ 0.00e+00  1.00e+00 -0.00e+00 -1.00e+00  1.00e+00  0.00e+00  0.00e+00 ... ]
[ 0.00e+00  1.00e+00 -0.00e+00 -1.00e+00  0.00e+00  1.00e+00  0.00e+00 ... ]
[ 1.00e+00  1.00e+00 -1.00e+00 -1.00e+00  0.00e+00  0.00e+00  1.00e+00 ... ]
[ 1.00e+00  1.00e+00 -1.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 1.00e+00  1.00e+00 -1.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 2.00e+00  1.00e+00 -2.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 2.00e+00  1.00e+00 -2.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]
[ 3.00e+00  1.00e+00 -3.00e+00 -1.00e+00  0.00e+00  0.00e+00  0.00e+00 ... ]

b:
[ 0.00e+00]
[ 1.00e+00]
[ 0.00e+00]
[ 1.00e+00]
[ 2.00e+00]
[ 1.00e+00]
[ 2.00e+00]
[ 3.00e+00]

c:
[ 0.00e+00]
[ 0.00e+00]
[ 0.00e+00]
[ 0.00e+00]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]
[ 5.00e-01]

G:
[-1.00e+00  0.00e+00  0.00e+00  0

array([0., 1.])