# Modeling and Evaluation

This notebook contains all related to predictive modeling and evaluation.

### Methodology

The methodology used is an adaptation of Crisp-DM. Currently, we are the steps Modeling and Evaluation:
1. Domain Understanding
2. Data Understanding
4. Data Preparation
5. **Modeling**
6. **Evaluation**



### Importing necessary libraries

In [30]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import GridSearchCV, KFold, cross_validate
from sklearn import linear_model
from sklearn import preprocessing
from sklearn import cross_decomposition
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import scale
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go
from skrebate import SURF
import pickle
import scipy

### Constants

In [31]:
#data_url =  "https://raw.githubusercontent.com/Naio/aasa-stability-prediction/master/data/processed/"
seed = 10 #Seed for controlling any random procedure during the experiments

### Data loading and preprocessing

In [32]:
#Generator that yields ready-to-use (preprocessed) protein datasets.
#Each dataset is a dictionary like {'dataset_name': protein_name,
#                                    'features': numpy array with dimensions (examples x features),
#                                    'target': numpy array with dimensions (examples,)}
def datasets():
    datasets_names = ['p1STN', 'p4LYZ', 'p1BPI', 'HLYZ']
    data_url = 'https://raw.githubusercontent.com/Naio/aasa-stability-prediction/master/data/processed/original/'
    
    for dataset_name in datasets_names:
        #Loads the dataset into a dataframe
        protein_dataset = pd.read_csv(data_url + dataset_name + '.csv')
        
        #Preprocess the dataset (dimensionality reduction, normalization)
        protein_dataset = preprocess_dataset(protein_dataset)
        
        #Sets target aside from features.
        features, target = split_features_and_target(protein_dataset)
        
        yield {'name': dataset_name, 'features': features, 'target': target}

In [33]:
#Method that performs dataset preprocessing tasks, particularly dimensionality reduction and normalization.
# Returns a dataframe with data preprocessed.
def preprocess_dataset(df):
    
    #Discard of those descriptors that are highly correlated
    df = discard_highly_correlated_descriptors(df)
    
    #Feature selection
    #Note: This implementation is able to work even if data is not normalized.
    df = select_subset_of_features_SURF(df, 40)
    
    #Z-Score Normalization
    df = normalize_data(df)
    
    return df

In [34]:
#Discards descriptors that are highly correlated at least with one other descriptor.
#The threshold is an absolute Pearson's r greater than 0.99
#Returns the dataframe that is passed as a parameter, but without the discarded descriptors.
def discard_highly_correlated_descriptors(df):
    
    #Calculates the absolute Pearson's r correlation matrix. Both -1 and 1 are highly correlated.
    correlations = df.corr().abs()
    
    #Gets the correlation matrix upper triangular.
    upper_corr = correlations.where(np.triu(np.ones(correlations.shape), k=1).astype(np.bool))
    
    #Discards the descriptors
    to_drop = [column for column in upper_corr.columns if any(upper_corr[column] > 0.99)]
    return df.drop(columns=to_drop)

In [35]:
#A dataframe containing protein data is passed as argument to df.
#It returns the dataframe, but containing only the 40 most relevant descriptors selected by SURF algorithm.
def select_subset_of_features_SURF(df, n_features):
    features_importance = calculate_features_importance(df, n_features)
    selected_features = select_n_most_important_features(features_importance, n_features)
    filtered_df = filter_selected_columns(df, selected_features)
    return filtered_df

#Returns a dataframe mapping each descriptor to its importance (a number) given by the SURF algorithm.
def calculate_features_importance(df, n_features):
    features, target = split_features_and_target(df)
    rlf = SURF(n_features_to_select=n_features)
    rlf.fit(features, target)
    
    return pd.DataFrame({'feature_name':df.iloc[:, 2:].columns, 
                         'importance': rlf.feature_importances_})

#The output of the calculate_features_importance methods is passed as an argument to this method.
#Based on features_importance, it selects the most importante n_features.
#Returns a list with the most important descriptors.
def select_n_most_important_features(features_importance, n_features):
    return features_importance.sort_values(by='importance', ascending=False).head(n_features)['feature_name'].tolist()

#It receives as an argument the dataframe containing the protein data and a list of most relevant descriptors to be selected.
#Returns the dataframe containing the most relevant descriptors.
def filter_selected_columns(df, columns):
    selected = ['id', 'stability']
    selected.extend(columns)
    return df[selected]

In [36]:
#Returns a dataset with normalized descriptors using Z-Score
def normalize_data(df):
    
    #Only descriptores are normalized, so we set apart the stability and mutation name attributes from the original dataset
    mutation_stability = df.iloc[:, 0:2]
    
    
    #Setting apart the descriptors data
    descriptors = df.iloc[:, 2:]
                      
    #Normalizing the descriptors using Z-Score
    normalized_descriptors = pd.DataFrame(scale(descriptors), columns=descriptors.columns)
    
    #Joining stability and mutation name to the normalized descriptors
    normalized_data = mutation_stability.join(normalized_descriptors)
    
    return normalized_data

In [37]:
#Returns descriptors and target values as numpy arrays
def split_features_and_target(df):
    features = df.iloc[:, 2:].to_numpy()
    target =  df.iloc[:,1].to_numpy()
    #target = df['stability'].to_numpy()
    #features = df.iloc[:,:len(df.columns) - 1].to_numpy()
    return features, target

### Defining hyperparameter grids
Each algorithm has its corresponding hyperparameter grid for later use in grid search inner cross-validation loop.

In [38]:
least_squares_grid = {} #Ordinary least square doesn't have hyperparamters

alpha_range = np.logspace(-6, 6, 13)
ridge_grid = {'alpha' : alpha_range} #Alpha between 1.e-06 and 1.e+06
lasso_grid = {'alpha' : alpha_range} 

pls_grid = {'n_components': np.linspace(start = 2, stop=25, num=24).astype(int)} #Between 2 and 25 Principal Components
c_range = np.logspace(-6, 6, 13) #Between 1.e-05 and 1.e+02. Lower C, more regularization. np.logspace(-3, 3, 7)
gamma_range = np.logspace(-6, 6, 13)
epsilon_range = np.linspace(start = 1.0, stop=2.5, num=16)
svr_grid = [
    #Grid for rbf and sigmoid kernel
    {'C': c_range, 'gamma': gamma_range, 'kernel': ['rbf'], 'epsilon': epsilon_range},
    #Grid for polinomial kernel
    {'C': c_range, 'gamma': gamma_range, 'kernel': ['poly'], 'degree': [2,3], 'epsilon': epsilon_range}
 ]


### Creating estimators for each learning method

In [39]:
#learning_methods function will create the empty estimators and map them to their corresponding hyperparameter grid.
def get_learning_methods():
    learning_methods = [{'name': 'OLS', 'estimator': linear_model.LinearRegression(), 'hyperparameter_grid': least_squares_grid},
                        {'name': 'RIDGE','estimator':linear_model.Ridge(random_state=seed), 'hyperparameter_grid': ridge_grid},
                        {'name': 'LASSO', 'estimator': linear_model.Lasso(max_iter=100000), 'hyperparameter_grid': lasso_grid},
                        {'name': 'PLS', 'estimator': cross_decomposition.PLSRegression(scale=False), 'hyperparameter_grid': pls_grid},
                        {'name': 'SVR', 'estimator': svm.SVR(), 'hyperparameter_grid': svr_grid}]#tol=0.01, max_iter=500000
    return learning_methods

### Nested Cross-validation

In [77]:
def nested_cv(features, target):
    """
    Performs nested cross-validation over the given dataset, for each learning method defined.
    
    Reports the scores, over both train and test sets, calculated in the outer cross-validation loop.
    
    Parameters:
    features: A numpy array of shape (n_samples, n_features) containing dataset features.
    target: A numpy array of shape (n_samples, n_features) containing dataset target variable.
    
    Returns:
    dict: A dictionary with keys 'test_scores', 'training_scores', containing the scores calculated in the outer cv loop.
    """
    #     
    #Score metric used for hyperparameter optimization in inner CV loop
    inner_scoring = 'neg_mean_squared_error'
    
    learning_methods = get_learning_methods()
    
    results = {}
    for learning_method in learning_methods:
        
        print("Modeling using", learning_method['name'],"method...")
        
        #Setting a seed ensures that each learning method will be trained on the same splits.
        inner_cv = KFold(n_splits=10, shuffle=True, random_state=seed + 1)
        outer_cv = KFold(n_splits=10, shuffle=True, random_state=seed)
        
        #Contains data about the results for a particular learning method.
        learning_method_results = {}
        
        learning_method_results['best_parameters'] = []
        learning_method_results['train_scores'] = {'R-Squared': [], 'RMSE': []}
        learning_method_results['test_scores'] = {'R-Squared': [], 'RMSE': []}
        
        #split() method returns a generator that gives all cross-validation partitions. 
        for train_index, test_index in outer_cv.split(features):
            
            #Split the data between train and test sets
            train_features, test_features = features[train_index], features[test_index]
            train_target, test_target = target[train_index], target[test_index]
            
            
            #When the fit() method is called, it will internally perform a grid search cross-validation. 
            #Once it finds the best hyperparameters, it will fit on complete training set using those parameters.
            grid_search_estimator = GridSearchCV(estimator = learning_method['estimator'], 
                           param_grid = learning_method['hyperparameter_grid'], 
                           cv = inner_cv, 
                           scoring = inner_scoring,
                           #When n_jobs is -1, all CPUs are used to run cross-validation in parallel
                           n_jobs=-1)
            
            grid_search_estimator.fit(train_features, train_target)
            best_parameters = grid_search_estimator.best_params_
            learning_method_results['best_parameters'].append(best_parameters)
            
            #Prediction using the best estimator selected via Grid Search CV
            train_prediction = grid_search_estimator.predict(train_features)
            test_prediction = grid_search_estimator.predict(test_features)
            
            
            #Calculating R-Squared score
            train_r2 = r2_score(y_true = train_target, y_pred = train_prediction)
            test_r2 = r2_score(y_true = test_target, y_pred = test_prediction)
            
            learning_method_results['train_scores']['R-Squared'].append(train_r2)
            learning_method_results['test_scores']['R-Squared'].append(test_r2)
            
            #Calculating RMSE score
            train_rmse =  mean_squared_error(y_true = train_target, y_pred = train_prediction, squared=False)
            test_rmse = mean_squared_error(y_true = test_target, y_pred = test_prediction, squared=False)
            
            learning_method_results['train_scores']['RMSE'].append(train_rmse)
            learning_method_results['test_scores']['RMSE'].append(test_rmse)
            
        
            
        #Stores results for a particular learning method
        results[learning_method['name']] = learning_method_results
    return results

In [73]:
#Receives the nested cv results for a specific dataset and transform them into more readeable dataframes
def extract_nestedcv_results(results):
    
    learning_methods_names = ['OLS', 'RIDGE', 'LASSO', 'PLS', 'SVR']
    
    #Groups test scores of every learning method in one dictionary per metric.
    train_r2 = {method_name:results[method_name]['train_scores']['R-Squared'] for method_name in learning_methods_names} 
    train_rmse = {method_name:results[method_name]['train_scores']['RMSE'] for method_name in learning_methods_names}
    
    #Groups test scores of every learning method in one dictionary per metric.
    test_r2 = {method_name:results[method_name]['test_scores']['R-Squared'] for method_name in learning_methods_names} 
    test_rmse = {method_name:results[method_name]['test_scores']['RMSE'] for method_name in learning_methods_names}
    
    
    #For each learning algorithm, groups best parameters selected in each iteration of Nested CV outer loop
    best_parameters = {method_name:results[method_name]['best_parameters'] for method_name in learning_methods_names}
    
    return {'train_r2': pd.DataFrame(train_r2), 'train_rmse': pd.DataFrame(train_rmse),
           'test_r2': pd.DataFrame(test_r2), 'test_rmse': pd.DataFrame(test_rmse), 
           'best_parameters': pd.DataFrame(best_parameters)}
    

###  Non-nested k-Fold Cross-validation

In [78]:
def non_nested_kfold_cv(features, target, k_value=10):
    """
    Performs non-nested k-fold cross-validation over the given dataset, for each learning method defined.
    First, an hyperpameter optimization cross-validation is performed. After that, a performance
    evaluation cross-validation loop is performed. This is for every learning method defined.
    
    Returns a dictionary containing the performance scores, for both train and test datasets, for every learning method.
    
    Parameters:
    features: A numpy array of shape (n_samples, n_features) containing dataset features.
    target: A numpy array of shape (n_samples, n_features) containing dataset target variable.
    k_value: A integer number indicating the number of folds the dataset will be splitted into by the k-fold cv loops.
    
    Returns:
    dict: A dictionary with keys 'test_r2', 'train_r2', 'test_rmse', 'train_rmse', 'best_hyperparameters'. The value of 'test_r2' 
    is another dictionary that associates learning methods ('SVR', 'PLS', ..., 'OLS') with its performance scores.
    """
    #Score metric used for hyperparameter optimization in inner CV loop
    inner_scoring = 'neg_root_mean_squared_error'
    
    #Score metrics used in outer CV loop for generalization performance estimation of the learning method 
    outer_scoring = ['r2', 'neg_root_mean_squared_error']
    
    test_r2 = {}
    train_r2 = {}
    test_rmse = {}
    train_rmse = {}
    best_hyperparameters = {}
    
    learning_methods = get_learning_methods()
    
    for learning_method in learning_methods:
        print("Modeling using", learning_method['name'],"method...")
        
        #Defines the k-fold cv loop used to hyperparameter optimization
        gridsearch_cv = KFold(n_splits=k_value, shuffle=True, random_state=seed + 1)
        
        #Defines the k-fold cv loop used to model (method/algorithm) evaluation.
        #A different seed is used to avoid overfitting.
        evaluation_cv = KFold(n_splits=k_value, shuffle=True, random_state=seed)
        
        grid_search = GridSearchCV(estimator = learning_method['estimator'], 
                           param_grid = learning_method['hyperparameter_grid'], 
                           cv = gridsearch_cv, 
                           scoring = inner_scoring,
                           #When n_jobs is -1, all CPUs are used to run cross-validation in parallel
                           n_jobs=-1)
        
        #When the fit() method is called, it will internally perform a grid search cross-validation.
        grid_search.fit(features, target)

        #The best model/hyperparameters are evaluated on a cross-validation process
        cv_results = cross_validate(estimator = grid_search.best_estimator_, 
                                          X = features, y = target, 
                                          cv = evaluation_cv, scoring = outer_scoring,
                                          return_train_score=True)
        
        test_r2[learning_method['name']] = cv_results['test_r2'].tolist()
        train_r2[learning_method['name']] = cv_results['train_r2'].tolist()
        
        #Inside CV, the RSME score is managed as a negative RMSE. Multiplying it by -1 will turn it into the usual positive RMSE  
        test_rmse[learning_method['name']] = (cv_results['test_neg_root_mean_squared_error']*-1).tolist() 
        train_rmse[learning_method['name']] = (cv_results['train_neg_root_mean_squared_error']*-1).tolist()
        
        
        best_hyperparameters[learning_method['name']] = grid_search.best_params_
    
    
    return {'train_r2': train_r2, 'train_rmse': train_rmse, 
            'test_r2': test_r2, 'test_rmse':test_rmse, 
            'best_hyperparameters': best_hyperparameters}


### 5x2 Cross-validation

In [79]:
def non_nested_5x2_cv(features, target):
    """
    Performs non-nested 5x2 cross-validation over the given dataset, for each learning method defined.
    First, an hyperpameter optimization cross-validation is performed. After that, a performance
    evaluation cross-validation loop is performed. This is for every learning method defined.
    
    Returns a dictionary containing the performance scores, for both train and test datasets, for every learning method.
    
    Parameters:
    features: A numpy array of shape (n_samples, n_features) containing dataset features.
    target: A numpy array of shape (n_samples, n_features) containing dataset target variable.
    
    Returns:
    dict: A dictionary with keys 'test_r2', 'train_r2', 'test_rmse', 'train_rmse', 'best_hyperparameters'. The value of 'test_r2' 
    is another dictionary that associates learning methods ('SVR', 'PLS', ..., 'OLS') with its performance scores.
    """
    #Score metric used for hyperparameter optimization in inner CV loop
    inner_scoring = 'neg_root_mean_squared_error'
    
    #Score metrics used in outer CV loop for generalization performance estimation of the learning method 
    outer_scoring = ['r2', 'neg_root_mean_squared_error']
    
    test_r2 = {}
    train_r2 = {}
    test_rmse = {}
    train_rmse = {}
    best_hyperparameters = {}
    
    learning_methods = get_learning_methods()
    
    for learning_method in learning_methods:
        
        print("Modeling using", learning_method['name'],"method...")
        #Defines the k-fold cv loop used to hyperparameter optimization
        gridsearch_cv = FiveForTwoKFold(features, seed + 1)
        
        #Defines the k-fold cv loop used to model (method/algorithm) evaluation.
        #A different seed is used to avoid overfitting.
        evaluation_cv = FiveForTwoKFold(features, seed)
        
        grid_search = GridSearchCV(estimator = learning_method['estimator'], 
                           param_grid = learning_method['hyperparameter_grid'], 
                           cv = gridsearch_cv, 
                           scoring = inner_scoring,
                           #When n_jobs is -1, all CPUs are used to run cross-validation in parallel
                           n_jobs=-1)
        
        #When the fit() method is called, it will internally perform a grid search cross-validation.
        grid_search.fit(features, target)

        #The best model/hyperparameters are evaluated on a cross-validation process
        cv_results = cross_validate(estimator = grid_search.best_estimator_, 
                                          X = features, y = target, 
                                          cv = evaluation_cv, scoring = outer_scoring,
                                          return_train_score=True)
        
        test_r2[learning_method['name']] = cv_results['test_r2'].tolist()
        train_r2[learning_method['name']] = cv_results['train_r2'].tolist()
        
        #Inside CV, the RSME score is managed as a negative RMSE. Multiplying it by -1 will turn it into the usual positive RMSE  
        test_rmse[learning_method['name']] = (cv_results['test_neg_root_mean_squared_error']*-1).tolist() 
        train_rmse[learning_method['name']] = (cv_results['train_neg_root_mean_squared_error']*-1).tolist()
        
        
        best_hyperparameters[learning_method['name']] = grid_search.best_params_
        
        
    
    return {'train_r2': train_r2, 'train_rmse': train_rmse, 
            'test_r2': test_r2, 'test_rmse':test_rmse, 
            'best_hyperparameters': best_hyperparameters}


#A methods that implements a similar behaviour to that of scikit KFold object. 
#It generate pairs of train and test sets, but implementing on 5x2 Cross-validation logic.
#Roughly equivalent to five 2-Fold CV loops.
def FiveForTwoKFold(data, seed):
    for i in range(5):
        kfold = KFold(n_splits=2, shuffle=True, random_state=seed + i)
        for train_index, test_index in kfold.split(data):
            yield train_index, test_index

### Running Experiment 3: Using 40 most relevant descriptors based on ranking generated by feature selection algorithm SURF.

Each evaluation method of two cross-validation loops, one for hyperparameter optimization, and one for model (learning algorithms) evaluation. Three evaluation methods were used:
- Two Nested 10-Fold CV loops.
- Two Non-nested 5-Fold CV loops.
- Two non-nested 5x2 CV loops.

In [70]:
def run_experiments():
    nested_cv_results = {}
    five_fold_cv_results = {}
    five_two_cv_results = {}
    
    for dataset in datasets():
        print("Running experiments in dataset ", dataset['name'])
        print("Nested CV validation method...")
        nested_cv_results[dataset['name']] = nested_cv(dataset['features'], dataset['target'])
        print("5-Fold CV validation method...")
        five_fold_cv_results[dataset['name']] =  non_nested_kfold_cv(dataset['features'], dataset['target'], k_value=5)
        print("5x2 CV validation method...")
        five_two_cv_results[dataset['name']] = non_nested_5x2_cv(dataset['features'], dataset['target'])
    
    return {'nested_cv': nested_cv_results, '5cv': five_fold_cv_results, '5x2cv': five_two_cv_results}


In [80]:
results = run_experiments()

Running experiments in dataset  p1STN
Nested CV validation method...
Modeling using OLS method...
Modeling using RIDGE method...
Modeling using LASSO method...


KeyboardInterrupt: 

### Exporting results

#### Serializing results

In [16]:
#Dumps a dictionary with the results into a text file so it can be loaded later in Python.
def dump_results(results, filename):
    with open(filename, 'wb') as results_file:
        pickle.dump(results, results_file)

    loaded_results = load_dumped_results(filename)

    #If the original results and the dumped ones are not the same
    if(results != loaded_results):
        raise Exception('There was a error. The loaded dumped results are not the same as the original.')
        
#Loads the dumped results
def load_dumped_results(filename):
    with open(filename, 'rb') as results_file:
        loaded_results = pickle.load(results_file)
    return loaded_results

In [69]:
dump_results(results['nested_cv'], '../../results/modeling/nestedCV.pickle')
dump_results(results['5cv'], '../../results/modeling/5CV.pickle')
dump_results(results['5x2cv'], '../../results/modeling/5x2CV.pickle')

#### Loading serialized results

In [19]:
nested = load_dumped_results('../../results/modeling/experiment_3/nestedCV.pickle')
five_fold = load_dumped_results('../../results/modeling/experiment_3/5CV.pickle')
five_two = load_dumped_results('../../results/modeling/experiment_3/5x2CV.pickle')

### Visualizing Results

In [24]:
extract_nestedcv_results(nested['p1STN'])

{'train_r2':    OLS     RIDGE     LASSO       PLS       SVR
 0  1.0  0.537851  0.585505  0.533128  0.594749
 1  1.0  0.554107  0.590958  0.575337  0.575494
 2  1.0  0.573481  0.642510  0.648507  0.670650
 3  1.0  0.501171  0.547794  0.501368  0.632421
 4  1.0  0.580820  0.633792  0.585057  0.610606
 5  1.0  0.541077  0.587824  0.539259  0.579555
 6  1.0  0.553775  0.610992  0.548925  0.641866
 7  1.0  0.496760  0.527098  0.500846  0.541230
 8  1.0  0.549366  0.592715  0.545311  0.588007
 9  1.0  0.614908  0.660139  0.641213  0.616703,
 'train_rmse':             OLS     RIDGE     LASSO       PLS       SVR
 0  1.948881e-14  1.184657  1.121918  1.190695  1.109337
 1  1.079688e-14  1.142587  1.094355  1.115054  1.114848
 2  1.089972e-14  1.079020  0.987853  0.979533  0.948177
 3  1.235693e-14  1.167350  1.111459  1.167119  1.002076
 4  1.640612e-14  1.051340  0.982669  1.046013  1.013299
 5  1.999266e-14  1.150600  1.090424  1.152876  1.101307
 6  2.108512e-14  1.135476  1.060181  1.141630

In [27]:
display(five_fold['p1STN'])

{'train_r2': {'OLS': [1.0, 1.0, 1.0, 1.0, 1.0],
  'RIDGE': [0.5328759888818988,
   0.5249585247820533,
   0.5795151682566662,
   0.5031194410987796,
   0.6176207496765513],
  'LASSO': [0.5838950208855633,
   0.6165145674542734,
   0.6511056507076516,
   0.5612295353559367,
   0.6740865468356408],
  'PLS': [0.5513196258415923,
   0.5587794388237339,
   0.5820544062562146,
   0.5007276225216402,
   0.6395838952875907],
  'SVR': [0.591215044964617,
   0.6438707965438653,
   0.6271238043902045,
   0.5572130399745749,
   0.6172277324745922]},
 'train_rmse': {'OLS': [9.6650192989116e-15,
   1.0409425229338591e-14,
   1.1552915016131283e-14,
   1.5986786371625798e-14,
   1.123951039254908e-14],
  'RIDGE': [1.202510994535023,
   1.1263683678439689,
   1.060853726520621,
   1.1576433400626396,
   1.0550452842978257],
  'LASSO': [1.1349439558726384,
   1.0120200496136467,
   0.9663341042788642,
   1.0878461226684197,
   0.9740361734410984],
  'PLS': [1.1785323190902381,
   1.0855318050743745,
  

In [28]:
display(five_two['p1STN'])

{'train_r2': {'OLS': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
  'RIDGE': [0.451521439011905,
   0.5797637985280175,
   0.6642583510995648,
   0.4764229205294682,
   0.5604724840175108,
   0.5636696688195953,
   0.6289920416905208,
   0.5720002234665076,
   0.5784640907815628,
   0.5289776989016479],
  'LASSO': [0.5843796262871643,
   0.6775094271260367,
   0.8183656632975305,
   0.6224341857998197,
   0.6721292919076778,
   0.6989824953719026,
   0.82626966567817,
   0.6642008537685531,
   0.6991039561145906,
   0.60537209184799],
  'PLS': [0.5102077529833102,
   0.6051302919863979,
   0.7043041869450097,
   0.5599477465230007,
   0.6207063265483725,
   0.5763167557804609,
   0.6897306307710391,
   0.6360099706325653,
   0.6743540835001025,
   0.5515395398558844],
  'SVR': [0.6414819083359974,
   0.557062916246898,
   0.6994550230505172,
   0.5658228608413511,
   0.6091729044637222,
   0.5960425606839395,
   0.5950333627134763,
   0.6965650847557234,
   0.5824767409808285,
 