# Kernelized SVM

In [None]:
from sklearn.svm import SVC

svc = SVC(C = 1)
svc.fit(bc_features_train, bc_target_train)

print("Accuracy on training set: {:.3f}".format(svc.score(bc_features_train, bc_target_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(bc_features_test, bc_target_test)))

# Random Forest + Ridge Regression for Extrapolation

In [None]:
from sklearn.base import BaseEstimator, RegressorMixin

class CombinedRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, 
                 base_regressor=RandomForestRegressor, 
                 backup_regressor=Ridge, 
                 lower=0.1, 
                 upper=1.9,
                 random_state=None,
                 **kwargs):
        self.base_regressor = base_regressor()
        self.backup_regressor = backup_regressor()
        
        self.set_random_state(random_state)
        
        self.lower = lower
        self.upper = upper
        
        self.set_params(**kwargs)
        
    def fit(self, X, y):
        self.base_regressor.fit(X, y)
        self.backup_regressor.fit(X, y)
        return self
    
    def predict(self, X, y=None):
        y_base = self.base_regressor.predict(X)
        y_backup = self.backup_regressor.predict(X)
        y_pred = np.where((self.lower * y_backup <= y_base) & (y_base <= self.upper * y_backup), 
                          y_base,
                          y_backup)
        return y_pred
    
    def __repr__(self):
        # not as good as sklearn pretty printing,
        # but shows updated params of subestimator
        return f'CombinedRegressor({self.get_params()})'
    
    def get_params(self, deep=False, **kwargs):
        base_regressor_params = self.base_regressor.get_params(**kwargs)
        # remove random state as it should be a global param of the estimator
        base_regressor_params.pop('random_state', None)
        base_regressor_params = {'base_regressor__' + key: value 
                                 for key, value 
                                 in base_regressor_params.items()}
        
        backup_regressor_params = self.backup_regressor.get_params(**kwargs)
        backup_regressor_params.pop('random_state', None)
        backup_regressor_params = {'backup_regressor__' + key: value 
                                   for key, value 
                                   in backup_regressor_params.items()}
        
        own_params = {
            'lower': self.lower,
            'upper': self.upper,
            'random_state': self.random_state
        }
        
        params = {**own_params,
                  **base_regressor_params, 
                  **backup_regressor_params, 
                 }
        
        if deep:
            params['base_regressor'] = self.base_regressor
            params['backup_regressor'] = self.backup_regressor
        return params
    
    def set_random_state(self, value):
        self.random_state=value
        if 'random_state' in self.base_regressor.get_params().keys():
            self.base_regressor.set_params(random_state=value)
        # linear reg does not have random state, but just in case..
        if 'random_state' in self.backup_regressor.get_params().keys():
            self.backup_regressor.set_params(random_state=value)
    
    def set_params(self, **params):
        for key, value in params.items():
            if key.startswith('base_regressor__'):
                trunc_key = {key[len('base_regressor__'):]: value}
                self.base_regressor.set_params(**trunc_key)
            elif key.startswith('backup_regressor__'):
                trunc_key = {key[len('backup_regressor__'):]: value}
                self.backup_regressor.set_params(**trunc_key)
            elif key == 'random_state':
                self.set_random_state(value)
            else:
                # try to fetch old value first to raise AttributeError
                # if not exists
                old_value = getattr(self, key)
                setattr(self, key, value)
        # set_params needs to return self to make gridsearch work
        return self
        
    def _more_tags(self):
        # no_validation added because validation is happening 
        # within built-in sklearn estimators
        return {**self.base_regressor._more_tags(), 'no_validation': True}

In [None]:
rf = CombinedRegressor(base_regressor__n_estimators=500, base_regressor__max_depth = 5, random_state=99)
rf.fit(ins_features_train, ins_target_train)

print("Accuracy on training set: {:.3f}".format(rf.score(ins_features_train, ins_target_train)))
print("Accuracy on test set: {:.3f}".format(rf.score(ins_features_test, ins_target_test)))