In [1]:
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore') 

In [8]:
import time 
import pandas as pd
import numpy as np 
import seaborn as sns 

# static ensemble models 
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from catboost import CatBoostRegressor

# classical models 
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression 
from sklearn.linear_model import LinearRegression 
from sklearn.svm import SVR
from sklearn.linear_model import Ridge 
from sklearn.linear_model import Lasso 
from sklearn.tree import DecisionTreeRegressor

# metrics 
from sklearn.metrics import (mean_squared_error, 
                             mean_absolute_error, 
                             r2_score, 
                             mean_squared_log_error)

from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import LabelEncoder
from sklearn import datasets  

# local 
from base import *  

sns.set(style='whitegrid') 
pd.set_option('display.max_columns', None)   

#### Load dataset 

In [3]:
from ucimlrepo import fetch_ucirepo  

# Dataset 1: Diabetes 
diabetes = datasets.load_diabetes() 

diabetes_df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) 
diabetes_df['target'] = diabetes.target 

X = diabetes_df[diabetes.feature_names] 
y = diabetes_df.target   

  
# Dataset 2  
abalone = fetch_ucirepo(id=1) 
  
X = abalone.data.features 
y = abalone.data.targets  

le = LabelEncoder() 
X['Sex'] = le.fit_transform(X['Sex'])  

In [4]:
X

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight
0,2,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500
1,2,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700
2,0,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100
3,2,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550
4,1,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550
...,...,...,...,...,...,...,...,...
4172,0,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490
4173,2,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605
4174,2,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080
4175,0,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) 
X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.20, random_state=42)   

#### Define pool 

In [15]:
pool_models = [
    CatBoostRegressor(random_state=42),
    XGBRegressor(random_state=42), 
    RandomForestRegressor(random_state=42), 
    LGBMRegressor(random_state=42), 
    Ridge(random_state=42),
    DecisionTreeRegressor(random_state=42),
    LinearRegression(), 
    KNeighborsRegressor(), 
    SVR()
]

for model in pool_models: 
    model.fit(X_train, y_train) 

Learning rate set to 0.046823
0:	learn: 3.1470884	total: 1.32ms	remaining: 1.31s
1:	learn: 3.0941290	total: 2.28ms	remaining: 1.14s
2:	learn: 3.0470365	total: 3.14ms	remaining: 1.04s
3:	learn: 3.0007476	total: 3.91ms	remaining: 974ms
4:	learn: 2.9582501	total: 4.74ms	remaining: 943ms
5:	learn: 2.9158359	total: 5.61ms	remaining: 929ms
6:	learn: 2.8794487	total: 6.44ms	remaining: 913ms
7:	learn: 2.8369002	total: 7.17ms	remaining: 889ms
8:	learn: 2.8019655	total: 7.84ms	remaining: 864ms
9:	learn: 2.7723391	total: 8.62ms	remaining: 854ms
10:	learn: 2.7412554	total: 9.39ms	remaining: 844ms
11:	learn: 2.7121976	total: 10.2ms	remaining: 841ms
12:	learn: 2.6820277	total: 10.9ms	remaining: 824ms
13:	learn: 2.6565123	total: 11.5ms	remaining: 813ms
14:	learn: 2.6308897	total: 12.1ms	remaining: 798ms
15:	learn: 2.6084521	total: 12.8ms	remaining: 786ms
16:	learn: 2.5890692	total: 13.6ms	remaining: 785ms
17:	learn: 2.5663161	total: 14.3ms	remaining: 778ms
18:	learn: 2.5449497	total: 15.1ms	remaining

### Define Dynamic Regressor model 

In [16]:
der = DER(pool_regressors=pool_models, 
          k=7, 
          knn_metric='minkowski', 
          metrics='mape', 
          threshold=0.2)

der.fit(X_dsel, y_dsel)

In [17]:
pred = der.predict(X_test)

In [18]:
mean_squared_error(y_test, pred) 

4.475509425178913