In [1]:
from base_model import *

In [2]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn import datasets
# for sampling
import random # use random.choice? and random.sample for interactions

from sklearn.metrics import make_scorer
from sklearn.metrics import *


In [3]:
score_likelihood = make_scorer(gaussian_likelihood)

In [4]:
# create random f
def fpoly(x):
    trans = random.choice(range(2, 5))
    
    def f1(x):
        return x
    
    def f2(x):
        return np.max(x-np.percentile(x, random.choice(range(100))), 0) + x
    
    for _ in range(trans):
        f = random.choice([np.sin, f1, f2,
                          f1, f2, 
                          f1, f2])
        x = x * f(x)
    return x

In [5]:
def eval_pipeline(additional_feats, X, y, verbose=True, scoring=make_scorer(mean_squared_error)):
    #print(additional_feats)    
    pipeline = additional_feats[:]
    pipeline.append(('SGD_regressor', SGDRegressor(loss='squared_loss', penalty='elasticnet')))
    model = Pipeline(pipeline[:])

    # split data into 10 folds
    kfold = KFold(n_splits=10, shuffle=True)
    results = cross_val_score(model, X, y, cv=kfold, 
                              scoring = scoring)
    if verbose:
        print("Result: {}".format(results.mean()))
    return results.mean()

In [6]:
n_samples = 1000
n_outliers = 50
X, y = datasets.make_regression(n_samples=n_samples, n_features=100,
                                      n_informative=100, noise=10,
                                      random_state=0, n_targets=1)

# sample X
cols = random.sample(range(X.shape[1]), 25)
y_poly = y
for col in cols:
    y_poly = fpoly(X[:, col]) + y
y_poly = ((y_poly)/np.mean(y_poly) + 
          np.random.normal(scale=np.std(y), size=y.shape[0]))

In [7]:
# take a random sample
eval_pipeline([], X, y)

Result: 116.46205468458277


116.46205468458277

In [8]:
# take a random sample
eval_pipeline([], X, y_poly)

Result: 343517.88959877973


343517.88959877973

In [9]:
#eval_pipeline([], X, y_poly, scoring=score_likelihood)

In [10]:
#### do some iterations

In [11]:
bmars = BMARS(X)

In [12]:
[('union', bmars.construct_pipeline(False))]

[('union',
  FeatureUnion(n_jobs=1, transformer_list=[('base model', BaseModel())],
         transformer_weights=None))]

In [13]:
# evaluate...
eval_pipeline([('union', bmars.construct_pipeline(False))], X, y)

Result: 120.10477961711578


120.10477961711578

In [14]:
# one iteration of mh
bk, dk, ck = output_prob_state(0)
u = np.random.uniform()
action = output_action(u, bk, dk, ck)

In [15]:
l = 20.62195407652395

In [16]:
basis = bmars.perform_action(action)

In [17]:
proposed_model = BMARS(**bmars.export())

In [18]:
output = bmars_sample_basis(X, list(basis))
output

{'basis': [2, 61], 'knot': array([-0.49327727,  0.47537661]), 'sign': [1, -1]}

In [19]:
if action == 'birth':
    output = bmars_sample_basis(X, list(basis), {'signs':[-1, 1]})



In [20]:
# do other things for birth
proposed_model.add_basis(**output)

In [21]:
# evaluate...
eval_pipeline([('union', bmars.construct_pipeline(False))], X, y)

Result: 118.27584107935343


118.27584107935343

In [22]:
proposed_model.construct_pipeline(False)

FeatureUnion(n_jobs=1,
       transformer_list=[('base model', BaseModel()), ('B_261', Hinge(indices=array([ 2, 61]), knots=array([ 0.4061 ,  0.71053]),
   signs=array([1, 1])))],
       transformer_weights=None)

In [23]:
a = Hinge(indices=[12, 25], knots=np.array([-1.8652 , -1.07113]), signs=[1, -1])

In [24]:
a.transform(X).reshape(-1, 1).shape

(1000, 1)

In [25]:
# evaluate...
eval_pipeline([('union', proposed_model.construct_pipeline(False))], X, y)

Result: 122.93045284873142




122.93045284873142