In [8]:
from distribution import TestDistribution

from RTER import RegressionTree

import numpy as np

from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

from time import time
import os

from comparison.ensemble import RegressionTreeBoosting, RegressionTreeEnsemble
from comparison.EKNN import EKNN


In [9]:


#distribution_index_vec=[1,2,3,4]





np.random.seed(1)



sample_generator=TestDistribution(7).returnDistribution()
n_test, n_train = 10000,3000
X_train, Y_train = sample_generator.generate(n_train)
X_test, Y_test = sample_generator.generate(n_test)



# RTER with cv
time_start=time()
parameters={"truncate_ratio_low":[0], "truncate_ratio_up":[0.4,0.6,0.8 ],
           "min_samples_split":[30], "max_depth":[1,2,3,4],
           "order":[1,2,3,4],"splitter":["maxedge"],
           "r_range_low":[0],"r_range_up":[1],
           "step":[1,2,4,6,7],"lamda":[0.001,0.01,0.1,0.5,1,2,5]}
cv_model_RTER=GridSearchCV(estimator=RegressionTree(),param_grid=parameters, cv=10, n_jobs=-1)
cv_model_RTER.fit(X_train, Y_train)

RTER_model = cv_model_RTER.best_estimator_
mse_score= - RTER_model.score(X_test, Y_test)
y_hat_RTER = RTER_model.predict(X_test)

time_end=time()

print("RTER: mse {} time {}".format(mse_score,time_end-time_start))


# boosting
time_start=time()
parameters={"rho":[0.01,0.05,0.1,0.2], "boost_num":[20,50,100],
            "min_samples_split":[2,5,10,20,30], 
            "max_depth":[3,4,5,6,7,8],"splitter":["maxedge"]}
cv_model_boosting=GridSearchCV(estimator=RegressionTreeBoosting(),param_grid=parameters, cv=10, n_jobs=-1)
cv_model_boosting.fit(X_train, Y_train)

boosting_model = cv_model_boosting.best_estimator_
mse_score= - boosting_model.score(X_test, Y_test)

time_end=time()

print("boosting: mse {} time {}".format(mse_score,time_end-time_start))


# ensemble
time_start=time()
parameters={ "ensemble_num":[20,50,100,200,500,1000,2000],
            "min_samples_split":[2,5,10,20,30], 
            "max_depth":[3,4,5,6,7,8],"splitter":["maxedge"]}
cv_model_ensemble=GridSearchCV(estimator=RegressionTreeEnsemble(),param_grid=parameters, cv=10, n_jobs=-1)
cv_model_ensemble.fit(X_train, Y_train)

ensemble_model = cv_model_ensemble.best_estimator_
mse_score= - ensemble_model.score(X_test, Y_test)

time_end=time()

print("ensemble: mse {} time {}".format(mse_score,time_end-time_start))



# GBRT
time_start=time()

parameters= {"n_estimators":[500,1000,2000], "learning_rate":[0.01,0.05]}
cv_model_GBRT=GridSearchCV(estimator=GradientBoostingRegressor(),param_grid=parameters, cv=10, n_jobs=-1)
cv_model_GBRT.fit(X_train, Y_train)
model_GBRT = cv_model_GBRT.best_estimator_
model_GBRT.fit(X_train, Y_train.ravel())

y_hat=model_GBRT.predict(X_test)
mse_score = MSE(y_hat, Y_test)

time_end=time()

print("GBRT: mse {} time {}".format(mse_score,time_end-time_start))


# RF
time_start=time()

parameters = {"n_estimators":[10,100,200]}
cv_model_RFR = GridSearchCV(estimator=RandomForestRegressor(),param_grid=parameters, cv=10, n_jobs=-1) 
cv_model_RFR.fit(X_train, Y_train)
model_RFR = cv_model_RFR.best_estimator_
model_RFR.fit(X_train, Y_train)

y_hat=model_RFR.predict(X_test)
mse_score = MSE(y_hat, Y_test)

time_end=time()

print("RF: mse {} time {}".format(mse_score,time_end-time_start))

# EKNN
time_start=time()

parameters = {"V":[3,5,7,9,11,13,15,17,19], "C":[5,7,9,10,11,13,15],"alpha":[0.01,0.05]}
cv_model_EKNN = GridSearchCV(estimator=EKNN(),param_grid=parameters, cv=10, n_jobs=-1) 
cv_model_EKNN.fit(X_train, Y_train)
model_EKNN = cv_model_EKNN.best_estimator_
model_EKNN.fit(X_train, Y_train)

y_hat=model_EKNN.predict(X_test)
mse_score = MSE(y_hat, Y_test)

time_end=time()

print("EKNN: mse {} time {}".format(mse_score,time_end-time_start))

RTER: mse 0.11515707714937366 time 76.37409996986389
boosting: mse 0.11569361668979176 time 81.9972288608551
ensemble: mse 0.11564950520845843 time 124.18226742744446
GBRT: mse 0.10737089187997645 time 17.769258499145508
RF: mse 0.10783506883666287 time 6.874638795852661


Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 687, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 102, in score
    return -MSE(self.predict(X),y)
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in predict
    return np.array([self.predict_single(x) for x in X])
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in <listcomp>
    return np.array([self.predict_single(x) for x in X])
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 43, in predict_single
    reg = Ridge(alpha=self.alpha).fit(r_matrix, y_hat_vec)
AttributeError: 'EKNN' object has no attribute 'alpha'

Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.9/site-p

  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 102, in score
    return -MSE(self.predict(X),y)
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in predict
    return np.array([self.predict_single(x) for x in X])
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in <listcomp>
    return np.array([self.predict_single(x) for x in X])
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 43, in predict_single
    reg = Ridge(alpha=self.alpha).fit(r_matrix, y_hat_vec)
AttributeError: 'EKNN' object has no attribute 'alpha'

Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 687, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_scorer.py"

  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in <listcomp>
    return np.array([self.predict_single(x) for x in X])
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 43, in predict_single
    reg = Ridge(alpha=self.alpha).fit(r_matrix, y_hat_vec)
AttributeError: 'EKNN' object has no attribute 'alpha'

Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 687, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 397, in _passthrough_scorer
    return estimator.score(*args, **kwargs)
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 102, in score
    return -MSE(self.predict(X),y)
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in predict
    return np.array([self.predict_single(x) for x in X])
  File "/data/karl/RTER/comparison/EKNN/EKNN.py", line 26, in <listcomp>
    return 

EKNN: mse 0.28681729664948025 time 6.72263503074646


In [10]:
cv_model_RTER.best_params_

{'lamda': 5,
 'max_depth': 3,
 'min_samples_split': 30,
 'order': 4,
 'r_range_low': 0,
 'r_range_up': 1,
 'splitter': 'maxedge',
 'step': 7,
 'truncate_ratio_low': 0,
 'truncate_ratio_up': 0.8}

In [11]:
cv_model_EKNN.best_params_

{'C': 5, 'V': 3}

In [12]:
cv_model_ensemble.best_params_

{'ensemble_num': 100,
 'max_depth': 3,
 'min_samples_split': 2,
 'splitter': 'maxedge'}

In [13]:
cv_model_boosting.best_params_

{'boost_num': 50,
 'max_depth': 3,
 'min_samples_split': 2,
 'rho': 0.05,
 'splitter': 'maxedge'}