In [1]:
import numpy as np
import time
import pandas as pd
from sklearn.linear_model import QuantileRegressor
from sklearn.metrics import mean_absolute_percentage_error


from __future__ import print_function
import sys
import threading
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread

np.random.seed(1)
rng = np.random.default_rng(1)
random_state = 42


In [2]:
def get_1d_data(
    funct, mu: float, sigma: float, n_samples: int, noise: float
):
    X_train = np.random.uniform(0, 10.0, size=n_samples).astype(np.float32)
    X_test = np.random.uniform(0, 10.0, size=n_samples).astype(np.float32)

    # X_train = rng.normal(mu, sigma, n_samples)
    # X_test = np.arange(mu - 4 * sigma, mu + 4 * sigma, sigma / 20.0)

    y_train, y_test = funct(X_train), funct(X_test)

    n_train = X_train.shape[0]
    n_test = X_test.shape[0]

    y_train += rng.normal(0, noise, y_train.shape[0])
    y_test += rng.normal(0, noise, y_test.shape[0])

    X_train = np.reshape(X_train, (n_train, 1))
    X_test = np.reshape(X_test, (n_test, 1))
    return (
        X_train.reshape(-1, 1),
        y_train,
        X_test.reshape(-1, 1),
        y_test
    )

In [3]:
def get_data(n):
    def f(x):
        ax = 0*x
        for i in range(len(x)):
            ax[i] = rng.poisson(np.sin(x[i])**2+0.1) + 0.03*x[i]*rng.random()
            ax[i] += 25*(rng.uniform(0, 1, 1) < 0.01)*rng.random()
        return ax.astype(np.float32)

    # number of training examples
    n_train = 3*n
    # number of test examples (to evaluate average coverage and length)
    n_test = 1*n

    # training features
    X_train = rng.uniform(0, 5.0, size=n_train).astype(np.float32)
    X_test = rng.uniform(0, 5.0, size=n_test).astype(np.float32)

    # generate labels
    y_train = f(X_train)
    y_test = f(X_test)

    # reshape the features
    X_train = np.reshape(X_train, (n_train, 1))
    X_test = np.reshape(X_test, (n_test, 1))
    return X_train, X_test, y_train, y_test
    

In [4]:
def quit_function(fn_name):
    # print to stderr, unbuffered in Python 2.
    print('{0} took too long'.format(fn_name), file=sys.stderr)
    sys.stderr.flush() # Python 3 stderr is likely buffered.
    thread.interrupt_main() # raises KeyboardInterrupt


def exit_after(s):
    '''
    use as decorator to exit process if 
    function takes longer than s seconds
    '''
    def outer(fn):
        def inner(*args, **kwargs):
            timer = threading.Timer(s, quit_function, args=[fn.__name__])
            timer.start()
            try:
                result = fn(*args, **kwargs)
            finally:
                timer.cancel()
            return result
        return inner
    return outer

In [5]:
@exit_after(15)
def train_from_lists(solver, intercept, X_train, y_train):
    mdl = QuantileRegressor(quantile=0.5, solver=solver, fit_intercept=intercept)
    t0 = time.time()
    mdl.fit(X_train, y_train)
    t1 = time.time()
    return mdl, t0, t1


In [6]:
@exit_after(5)
def predict_from_lists(mdl, X_test, y_test):
    y_pred = mdl.predict(X_test)
    t2 = time.time()
    mape = mean_absolute_percentage_error(y_test, y_pred)
    return t2, mape

In [10]:
solvers = ['highs-ds', 'highs-ipm', 'highs', 'interior-point', 'revised simplex']
intercepts = [True, False]
list_n = []
list_solvers = []
list_intercepts = []
list_t0 = []
list_t1 = []
list_t2 = [] 
list_mape = []

for i in np.arange(0.25, 2, 0.25):
    n = int(pow(10, i))
    print(n)
    X_train, X_test, y_train, y_test = get_data(n)
    for solver in solvers:
        list_n.append(n)
        list_solvers.append(solver)
        for intercept in intercepts:
            list_intercepts.append(intercept)
            try: 
                mdl, t0, t1 = train_from_lists(solver, intercept, X_train, y_train)
                list_t0.append(t0)
                list_t1.append(t1)
                try:
                    t2, mape = predict_from_lists(mdl, X_test, y_test)
                    list_t2.append(t2)
                    list_mape.append(mape)
                except KeyboardInterrupt:
                    print("number of datapoints: ", n)
                    print("solver used: ", solver)
                    print()
                    list_t2.append(np.nan)
                    list_mape.append(np.nan)
            except KeyboardInterrupt:
                print("number of datapoints: ", n)
                print("solver used: ", solver)
                print()
                list_t0.append(np.nan)
                list_t1.append(np.nan)
                list_t2.append(np.nan)
                list_mape.append(np.nan)

data = pd.DataFrame(list(zip(list_n, list_solvers, list_intercepts, list_t0, list_t1, list_t2, list_mape)), columns =['list_n', 'list_solvers', 'list_intercepts', 'list_t0', 'list_t1', 'list_t2', 'list_mape'])
data["train_time"] = np.round(data["list_t1"] - data["list_t0"], 4)
data["predict_time"] = np.round(data["list_t2"] - data["list_t1"], 4)
data = data.drop(['list_t0', 'list_t1', 'list_t2'], axis=1)

1
3
5
10
17
31
56


In [14]:
data

Unnamed: 0,list_n,list_solvers,list_intercepts,list_mape,train_time,predict_time
0,1,highs-ds,True,1.906512,0.0036,0.0005
1,1,highs-ipm,False,0.310241,0.0006,0.0002
2,1,highs,True,1.906512,0.0006,0.0001
3,1,interior-point,False,0.310241,0.0007,0.0002
4,1,revised simplex,True,1.906512,0.0005,0.0003
5,3,highs-ds,False,0.310241,0.0009,0.0004
6,3,highs-ipm,True,1.906512,0.0048,0.0003
7,3,highs,False,0.310241,0.0029,0.0002
8,3,interior-point,True,1.906512,0.0016,0.0002
9,3,revised simplex,False,0.310241,0.0007,0.0002


In [13]:
data.groupby('list_intercepts').mean()

Unnamed: 0_level_0,list_n,list_mape,train_time,predict_time
list_intercepts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,17.058824,0.744837,0.000988,0.000147
True,18.055556,6.536444,0.001344,0.00015


In [11]:
list_n_with_na = list(data[data['train_time'].isna()]['list_n'].unique())
for i in range(data.shape[0]):
    if data.loc[i]['list_n'] in list_n_with_na:
        data.loc[i, 'is_na'] = 1

data[data['train_time'].isna()]

Unnamed: 0,list_n,list_solvers,list_intercepts,list_mape,train_time,predict_time


In [9]:
print(list_n_with_na)
data.groupby(['list_solvers', 'is_na']).mean()

[]


KeyError: 'is_na'

In [None]:
data.groupby(['list_solvers', 'is_na']).median()