In [80]:
from sklearn import linear_model
import numpy as np
from sklearn import svm
from sklearn import tree
from sklearn import ensemble

<h2> Regression </h2>
<h3> Models with default settings </h3>

In [20]:
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [69]:
def regression_linear_models(train_set_ready, train_set_labels, cv=4):
    
    models = {'LinearRegression': {'rmse_training': -1, 'rmse_cv': -1},
                   'Ridge': {'rmse_training': -1, 'rmse_cv': -1},
                   'Lasso': {'rmse_training': -1, 'rmse_cv': -1},
                   'ElasticNet': {'rmse_training': -1, 'rmse_cv': -1},
                   'Lars': {'rmse_training': -1, 'rmse_cv': -1},
                   'LassoLars': {'rmse_training': -1, 'rmse_cv': -1},
                   'OrthogonalMatchingPursuit': {'rmse_training': -1, 'rmse_cv': -1},
                   'BayesianRidge': {'rmse_training': -1, 'rmse_cv': -1},
                   'ARDRegression': {'rmse_training': -1, 'rmse_cv': -1},
                   'LogisticRegression': {'rmse_training': -1, 'rmse_cv': -1},
                   'SGDRegressor': {'rmse_training': -1, 'rmse_cv': -1},
                   'PassiveAggressiveRegressor': {'rmse_training': -1, 'rmse_cv': -1},
                   'HuberRegressor': {'rmse_training': -1, 'rmse_cv': -1},
                   'TweedieRegressor': {'rmse_training': -1, 'rmse_cv': -1},
                   'TheilSenRegressor': {'rmse_training': -1, 'rmse_cv': -1},
                   'RANSACRegressor': {'rmse_training': -1, 'rmse_cv': -1}
                  }

    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        fun = getattr(linear_model, k)
        reg = fun()
        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_ready)

        train_mse = mean_squared_error(predictions, train_set_labels)
        v['rmse_training'] = np.sqrt(train_mse)

        scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                      scoring="neg_mean_squared_error", cv=cv)

        v['rmse_cv'] = np.sqrt(-scores).mean()
        
    return models

In [None]:
def regression_kernelridge(train_set_ready, train_set_labels, cv=4):
    
    reg = KernelRidge()
    reg.fit(train_set_ready, train_set_labels)
    
    predictions = reg.predict(train_set_new_ready)

    reg_mse = mean_squared_error(predictions, train_set_labels)
    reg_rmse = np.sqrt(reg_mse)
    
    reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                 scoring="neg_mean_squared_error", cv=cv)

    reg_rmse_scores_mean = np.sqrt(-reg_scores).mean()
    
    return {'KernelRidge': {'rmse_training': reg_rmse, 'rmse_cv': reg_rmse_scores_mean} }

In [None]:
def regression_svm(train_set_ready, train_set_labels, cv=4):
    
    models = {'SVR': {'rmse_training': -1, 'rmse_cv': -1},
              'NuSVR': {'rmse_training': -1, 'rmse_cv': -1},
              'LinearSVR': {'rmse_training': -1, 'rmse_cv': -1},
             }
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        fun = getattr(svm, k)
        reg = fun()
        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_ready)

        train_mse = mean_squared_error(predictions, train_set_labels)
        v['rmse_training'] = np.sqrt(train_mse)

        scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                      scoring="neg_mean_squared_error", cv=cv)

        v['rmse_cv'] = np.sqrt(-scores).mean()
        
    return models
    
    

In [None]:
def regression_gaussianprocess(train_set_ready, train_set_labels, cv=4):
    
    reg = GaussianProcessRegressor()
    reg.fit(train_set_ready, train_set_labels)
    
    predictions = reg.predict(train_set_new_ready)

    reg_mse = mean_squared_error(predictions, train_set_labels)
    reg_rmse = np.sqrt(reg_mse)
    
    reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                 scoring="neg_mean_squared_error", cv=cv)

    reg_rmse_scores_mean = np.sqrt(-reg_scores).mean()
    
    return {'GaussianProcessRegressor': {'rmse_training': reg_rmse, 'rmse_cv': reg_rmse_scores_mean} }
    

In [None]:
def regression_crossdecomposition(train_set_ready, train_set_labels, cv=4):
    
    models = {'PLSRegression': {'rmse_training': -1, 'rmse_cv': -1},
              'PLSCanonical': {'rmse_training': -1, 'rmse_cv': -1},
              'CCA': {'rmse_training': -1, 'rmse_cv': -1}
             }
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        print(k,': ')
        fun = getattr(cross_decomposition, k)
        reg = fun()
        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_ready)

        train_mse = mean_squared_error(predictions, train_set_labels)
        v['rmse_training'] = np.sqrt(train_mse)

        scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                      scoring="neg_mean_squared_error", cv=cv)

        v['rmse_cv'] = np.sqrt(-scores).mean()
        
    return models
    
    

In [None]:
def regression_decisiontree(train_set_ready, train_set_labels, cv=4):
    
    reg = tree.DecisionTreeRegressor()
    reg.fit(train_set_ready, train_set_labels)
    
    predictions = reg.predict(train_set_new_ready)

    reg_mse = mean_squared_error(predictions, train_set_labels)
    reg_rmse = np.sqrt(reg_mse)
    
    reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                 scoring="neg_mean_squared_error", cv=cv)

    reg_rmse_scores_mean = np.sqrt(-reg_scores).mean()
    
    return {'DecisionTreeRegressor': {'rmse_training': reg_rmse, 'rmse_cv': reg_rmse_scores_mean} }

In [None]:
def regression_ensemble(train_set_ready, train_set_labels, cv=4):
    
    models = {'AdaBoostRegressor': {'rmse_training': -1, 'rmse_cv': -1},
              'BaggingRegressor': {'rmse_training': -1, 'rmse_cv': -1},
              'ExtraTreesRegressor': {'rmse_training': -1, 'rmse_cv': -1},
              'GradientBoostingRegressor': {'rmse_training': -1, 'rmse_cv': -1},
              'IsolationForest': {'rmse_training': -1, 'rmse_cv': -1},
              'RandomForestRegressor': {'rmse_training': -1, 'rmse_cv': -1},
              'VotingRegressor': {'rmse_training': -1, 'rmse_cv': -1},
              'StackingRegressor': {'rmse_training': -1, 'rmse_cv': -1}
              }

    estimators = [('ridge', RidgeCV()),
              ('lasso', LassoCV(random_state=42)),
              ('knr', KNeighborsRegressor(n_neighbors=20,
                                          metric='euclidean'))]
    
    train_set_labels = np.ravel(train_set_labels)
    for k,v in sorted(models.items()):
        
        print(k,': ')
        fun = getattr(ensemble, k)

        if not k in ['StackingRegressor','VotingRegressor']:
            reg = fun()
        else:
            reg = fun(estimators)

        reg.fit(train_set_ready, train_set_labels)

        predictions = reg.predict(train_set_ready)

        train_mse = mean_squared_error(predictions, train_set_labels)
        v['rmse_training'] = np.sqrt(train_mse)

        scores = cross_val_score(reg, train_set_new_ready, np.ravel(train_set_labels),
                                      scoring="neg_mean_squared_error", cv=cv)

        v['rmse_cv'] = np.sqrt(-scores).mean() 
    
    return models

In [None]:
def regression_neuralnetwork(train_set_ready, train_set_labels, cv=4):
    
    reg = MLPRegressor()
    reg.fit(train_set_ready, train_set_labels)
    
    predictions = reg.predict(train_set_new_ready)

    reg_mse = mean_squared_error(predictions, train_set_labels)
    reg_rmse = np.sqrt(reg_mse)
    
    reg_scores = cross_val_score(reg, train_set_new_ready, train_set_labels,
                                 scoring="neg_mean_squared_error", cv=cv)

    reg_rmse_scores_mean = np.sqrt(-reg_scores).mean()
    
    return {'neural_network': {'rmse_training': reg_rmse, 'rmse_cv': reg_rmse_scores_mean} }