In [2]:
from sklearn import linear_model
import numpy as np
from sklearn import svm
from sklearn import tree
from sklearn import ensemble
import itertools

<h2> All-purpose </h2>

In [20]:
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [91]:
def attributes(attribute = {}):
    
    """ f.e.
    attribute = {'loss': ['ls', 'lad', 'huber', 'quantile'],
     'criterion': ['friedman_mse', 'squared_error', 'mse'],
     'max_features': [None, 'auto', 'sqrt', 'log2']
    }
    """
    
    attribute_names = []
    attribute_values = []
    final_attr_list = []
    
    for k,v in sorted(attribute.items()):
        attribute_names.append(k)
        attribute_values.append(v)
    
    for attr_composition in itertools.product(*attribute_values):
        new_dic = {}
        for i in range(len(attribute_names)):
            new_dic[attribute_names[i]] = attr_composition[i]
        final_attr_list.append(new_dic)
        
    return final_attr_list

<h2> Regression </h2>

In [None]:
def regression_randomized_tuning(X, y, distributions, model, n_iter=1, r_state=None, cv=4):

    reg = RandomizedSearchCV(model, distributions, n_iter=n_iter, random_state=r_state)

    reg.fit(X, y)

    predictions = reg.predict(X)
    train_mse = mean_squared_error(predictions, y)
    rmse_training = np.sqrt(train_mse)

    scores = cross_val_score(reg, X, np.ravel(y),
                                  scoring="neg_mean_squared_error", cv=cv)

    rmse_cv = np.sqrt(-scores).mean()
    
    return rmse_training, rmse_cv, reg.best_params_

In [None]:
def regression_linear_models(train_set_ready, train_set_labels, cv=4):
    
    models = {'LinearRegression': {'attr': [{}]},
               'Ridge': {'attr': [{'solver':'svd'}, {'solver':'cholesky'},
                                  {'solver':'lsqr'}, {'solver':'sparse_cg'},
                                  {'solver':'sag'}, {'solver':'saga'}]},
               'Lasso': {'attr': [{'selection': 'cyclic'}, {'selection': 'random'}]},
               'ElasticNet': {'attr': [{'selection': 'cyclic'}, {'selection': 'random'}]},
               'Lars': {'attr': [{}]},
               'LassoLars': {'attr': [{}]},
               'OrthogonalMatchingPursuit': {'attr': [{}]},
               'BayesianRidge': {'attr': [{}]},
               'ARDRegression': {'attr': [{}]},
               'LogisticRegression': {'attr': [{'penalty': 'l1', 'solver': 'liblinear'},
                                              {'penalty': 'l1', 'solver': 'saga'},
                                              {'penalty': 'l2', 'solver': 'newton-cg'},
                                              {'penalty': 'l2', 'solver': 'lbfgs'},
                                              {'penalty': 'l2', 'solver': 'liblinear'},
                                              {'penalty': 'l2', 'solver': 'sag'},
                                              {'penalty': 'l2', 'solver': 'saga'},
                                              {'penalty': 'elasticnet', 'solver': 'saga', 'l1_ratio':0.5},
                                              {'penalty': 'none', 'solver': 'newton-cg'},
                                              {'penalty': 'none', 'solver': 'lbfgs'},
                                              {'penalty': 'none', 'solver': 'sag'},
                                              {'penalty': 'none', 'solver': 'saga'}]},
               'SGDRegressor': {'attr': [{'penalty': 'l1', 'learning_rate': 'constant'},
                                         {'penalty': 'l1', 'learning_rate': 'optimal'},
                                         {'penalty': 'l1', 'learning_rate': 'invscaling'},
                                         {'penalty': 'l1', 'learning_rate': 'adaptive'},
                                         {'penalty': 'l2', 'learning_rate': 'constant'},
                                         {'penalty': 'l2', 'learning_rate': 'optimal'},
                                         {'penalty': 'l2', 'learning_rate': 'invscaling'},
                                         {'penalty': 'l2', 'learning_rate': 'adaptive'},
                                         {'penalty': 'elasticnet', 'learning_rate': 'adaptive'},
                                         {'penalty': 'elasticnet', 'learning_rate': 'adaptive'},
                                         {'penalty': 'elasticnet', 'learning_rate': 'adaptive'},
                                         {'penalty': 'elasticnet', 'learning_rate': 'adaptive'}]},
               'PassiveAggressiveRegressor': {'attr': [{'loss': 'epsilon_insensitive'},
                                                       {'loss': 'squared_epsilon_insensitive'}]},
               'HuberRegressor': {'attr': [{}]},
               'TweedieRegressor': {'attr': [{'link': 'identity'}, {'link': 'log'}]},
               'TheilSenRegressor': {'attr': [{}]},
               'RANSACRegressor': {'attr': [{}]}
              }

    final_models = {}
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        print(v['attr'])
        for i in range(len(v['attr'])):
            fun = getattr(linear_model, k)
            reg = fun(**v['attr'][i])
            reg.fit(train_set_ready, train_set_labels)

            predictions = reg.predict(train_set_ready)

            train_mse = mean_squared_error(predictions, train_set_labels)
            rmse_training = np.sqrt(train_mse)

            scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                          scoring="neg_mean_squared_error", cv=cv)

            rmse_cv = np.sqrt(-scores).mean()
            
            model_name = str(k) + str(v['attr'][i])
            
            final_models[model_name] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models

In [84]:
from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel, DotProduct, ExpSineSquared, Matern, PairwiseKernel, RationalQuadratic, RBF, WhiteKernel

def regression_kernelridge(train_set_ready, train_set_labels, cv=4):
    
    kernels = [ConstantKernel(), DotProduct(), ExpSineSquared(), Matern(),
              PairwiseKernel(), RationalQuadratic(), RBF(), WhiteKernel(),
              'linear']
    
    final_models = {}
    
    for kernel in kernels:

        reg = KernelRidge(kernel=kernel)
        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_new_ready)

        reg_mse = mean_squared_error(predictions, train_set_labels)
        rmse_training = np.sqrt(reg_mse)

        reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                     scoring="neg_mean_squared_error", cv=cv)

        rmse_cv = np.sqrt(-reg_scores).mean()
        
        final_models['KernelRidge ' + str(kernel)] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models

In [None]:
def regression_svm(train_set_ready, train_set_labels, cv=4):
    
    models = {'SVR': {'attr': [{'kernel': 'linear'},
                               {'kernel': 'poly'},
                               {'kernel': 'rbf'},
                               {'kernel': 'sigmoid'},
                               {'kernel': 'linear'}]},
              'NuSVR': {'attr': [{'kernel': 'linear'},
                               {'kernel': 'poly'},
                               {'kernel': 'rbf'},
                               {'kernel': 'sigmoid'},
                               {'kernel': 'linear'}]},
              'LinearSVR': {'attr': [{'loss': 'epsilon_insensitive'},
                                     {'loss': 'squared_epsilon_insensitive'}]},
             }
    
    final_models = {}
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        print(v['attr'])
        for i in range(len(v['attr'])):
            fun = getattr(svm, k)
            reg = fun(**v['attr'][i])
            reg.fit(train_set_ready, train_set_labels)

            predictions = reg.predict(train_set_ready)

            train_mse = mean_squared_error(predictions, train_set_labels)
            rmse_training = np.sqrt(train_mse)

            scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                          scoring="neg_mean_squared_error", cv=cv)

            rmse_cv = np.sqrt(-scores).mean()
            
            model_name = str(k) + str(v['attr'][i])
            
            final_models[model_name] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models
    
    

In [None]:
def regression_gaussianprocess(train_set_ready, train_set_labels, cv=4):

    kernels = [ConstantKernel(), DotProduct(), ExpSineSquared(), Matern(),
              PairwiseKernel(), RationalQuadratic(), RBF(), WhiteKernel()]
    
    final_models = {}
    
    for kernel in kernels:
        try:
            reg = GaussianProcessRegressor(kernel=kernel)
            reg.fit(train_set_ready, train_set_labels)

            predictions = reg.predict(train_set_new_ready)

            reg_mse = mean_squared_error(predictions, train_set_labels)
            rmse_training = np.sqrt(reg_mse)

            reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                         scoring="neg_mean_squared_error", cv=cv)

            rmse_cv = np.sqrt(-reg_scores).mean()

            final_models['GaussianProcessRegressor ' + str(kernel)] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        except Exception as e: print(e)
            
        
    return final_models

In [None]:
def regression_crossdecomposition(train_set_ready, train_set_labels, cv=4):
    
    models = {'PLSRegression': {'attr': [{}]},
              'PLSCanonical': {'attr': [{'algorithm': 'nipals'},
                                        {'algorithm': 'svd'}]},
              'CCA': {'attr': [{}]}
             }
    
    final_models = {}
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        print(v['attr'])
        for i in range(len(v['attr'])):
            fun = getattr(cross_decomposition, k)
            reg = fun(**v['attr'][i])
            reg.fit(train_set_ready, train_set_labels)

            predictions = reg.predict(train_set_ready)

            train_mse = mean_squared_error(predictions, train_set_labels)
            rmse_training = np.sqrt(train_mse)

            scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                          scoring="neg_mean_squared_error", cv=cv)

            rmse_cv = np.sqrt(-scores).mean()
            
            model_name = str(k) + str(v['attr'][i])
            
            final_models[model_name] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models

In [None]:
def regression_decisiontree(train_set_ready, train_set_labels, cv=4):
    
    criterion = ['mse', 'friedman_mse', 'mae', 'poisson']
    splitter = ['best','random']
    max_features = [None, 'auto', 'sqrt', 'log2']
    
    attributes = []
    
    for i in range(len(criterion)):
        for j in range(len(splitter)):
            for k in range(len(max_features)):
                new_dic = {'criterion': criterion[i], 'splitter': splitter[j],
                            'max_features': max_features[k]}
                
                attributes.append(new_dic)      
                
    final_models = {}
    
    for attr in attributes:

        reg = tree.DecisionTreeRegressor(**attr)
        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_new_ready)

        reg_mse = mean_squared_error(predictions, train_set_labels)
        rmse_training = np.sqrt(reg_mse)

        reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                     scoring="neg_mean_squared_error", cv=cv)

        rmse_cv = np.sqrt(-reg_scores).mean()
        
        final_models['DecisionTreeRegressor ' + str(attr)] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models

In [None]:
def regression_ensemble(train_set_ready, train_set_labels, cv=4):
    
    estimators = [('ridge', RidgeCV()),
          ('lasso', LassoCV(random_state=42)),
          ('knr', KNeighborsRegressor(n_neighbors=20,
                                      metric='euclidean'))]
    
    gbr_attributes = {'loss': ['ls', 'lad', 'huber', 'quantile'],
     'criterion': ['friedman_mse', 'mse'],
     'max_features': [None, 'auto', 'sqrt', 'log2']}
    
    rf_attributes = {'criterion': ['mse', 'mae', 'poisson'],
                      'max_features': ['sqrt', 'log2', None, 1]}   
    
    models = {'AdaBoostRegressor': {'attr': [{'loss': 'linear'},
                                             {'loss': 'square'},
                                             {'loss': 'exponential'}]},
              'BaggingRegressor': {'attr': [{}]},
              'ExtraTreesRegressor': {'attr': [{'criterion': 'mse', 'max_features': 'sqrt'},
                                               {'criterion': 'mae', 'max_features': 'sqrt'},
                                               {'criterion': 'mse', 'max_features': 'log2'},
                                               {'criterion': 'mae', 'max_features': 'log2'},
                                               {'criterion': 'mse', 'max_features': None},
                                               {'criterion': 'mae', 'max_features': None},
                                               {'criterion': 'mse', 'max_features': 1},
                                               {'criterion': 'mae', 'max_features': 1}]},
              'GradientBoostingRegressor': {'attr': attributes(gbr_attributes)},
              'IsolationForest': {'attr': [{}]},
              'RandomForestRegressor': {'attr': attributes(rf_attributes)},
              'VotingRegressor': {'attr': [{'estimators': estimators}]},
              'StackingRegressor': {'attr': [{'estimators': estimators}]}
              }
    
    final_models = {}
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        print(v['attr'])
        for i in range(len(v['attr'])):
            fun = getattr(ensemble, k)
            reg = fun(**v['attr'][i])
            reg.fit(train_set_ready, train_set_labels)

            predictions = reg.predict(train_set_ready)

            train_mse = mean_squared_error(predictions, train_set_labels)
            rmse_training = np.sqrt(train_mse)

            scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                          scoring="neg_mean_squared_error", cv=cv)

            rmse_cv = np.sqrt(-scores).mean()
            
            model_name = str(k) + str(v['attr'][i])
            
            final_models[model_name] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models
    
    

In [None]:
def regression_neuralnetwork(train_set_ready, train_set_labels, cv=4):

    nn_attributes = {'activation': ['identity', 'logistic', 'relu', 'tanh'],
     'solver': ['lbfgs', 'sgd', 'adam'],
     'learning_rate': ['constant', 'invscaling', 'adaptive']}

    attributes_list = attributes(nn_attributes)
                
    final_models = {}
    
    for attr in attributes_list:

        reg = MLPRegressor(**attr)
        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_new_ready)

        reg_mse = mean_squared_error(predictions, train_set_labels)
        rmse_training = np.sqrt(reg_mse)

        reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                     scoring="neg_mean_squared_error", cv=cv)

        rmse_cv = np.sqrt(-reg_scores).mean()
        
        final_models['MLPRegressor ' + str(attr)] = {'rmse_training': rmse_training, 'rmse_cv': rmse_cv}
        
    return final_models