# Experiments for Machine Learning Models for Water Treatment 

### generally speaking, experimental results and stability for [random, unnormalized] are 
### better than [clustering, normalized]

In [1]:
from DataSet_ablation import DataSet
import config_ablation
import os.path as osp
import pandas as pd
import xlrd
import os
import numpy as np
import math
import logging
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# add ElasticNet and Lasso model fit with Least Angle Regression
from sklearn.linear_model import LassoLars
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import LassoLarsCV

from sklearn.linear_model import LassoCV
from sklearn.neural_network import MLPRegressor

# Adding XGBoost、LightGBM
import xgboost as xgb
import lightgbm as lgb
 
# Adding Multiple Linear Regression
from sklearn.linear_model import LinearRegression

from sklearn import cross_validation
from sklearn.model_selection import KFold, train_test_split, GridSearchCV

# Keras
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, CuDNNLSTM
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
class Water:
    def __init__(self, excel_files):
        self.source = DataSet(excel_files)
        
      # only init and structure model 
    
      # return S: time-series segments to preserve time-consistent [training]
      # return D: time penalty term to detect abrupt change [training]
      # return lamda3:  \lambda_3 to adjust [grid]
      # return alpha: alpha including (\lambda_1) \lambda_2 to adjust [grid]
      # return I: augmented identity matrix [training]
      
    
    def build_model(self, method, source):
        
        if method=='no_timeSeries_Tenlar':
            opt, best_params_score, comb_params_score, comb_scores = self.cv_no_timeSeries_Tenlar(source.train_x, source.train_y)
            # grid lambda1 + lambda2
            alp = opt['alpha']
            model = LassoLars(alpha=alp, max_iter=50)
            # S/D/I training
#             regressor.fit(trainX,trainY)
            return opt, best_params_score, model,comb_params_score ,comb_scores
        
        if method=='no_l3Norm_Tenlar':
            opt, best_params_score, comb_params_score, comb_scores = self.cv_no_l3Norm_Tenlar(source.train_x, source.train_y, source.ts_x)
            alp = opt['alpha']
            model = LassoLars(alpha=alp, max_iter=50)
            return opt, best_params_score, model,comb_params_score ,comb_scores
                
        if method=='no_l2Norm_Tenlar':
            opt, best_params_score, comb_params_score, comb_scores = self.cv_no_l2Norm_Tenlar(source.train_x, source.train_y, source.ts_x)
            alp = opt['alpha']
            model = LassoLars(alpha=alp, max_iter=50)
            return opt, best_params_score, model,comb_params_score ,comb_scores
        
        if method=='no_IM_Tenlar':
            opt, best_params_score, comb_params_score, comb_scores = self.cv_LarsemiSupervisedElasticNetRegressor(source.train_x, source.train_y, source.ts_x)
            alp = opt['alpha']
            model = LassoLars(alpha=alp, max_iter=50)
            return opt, best_params_score, model,comb_params_score ,comb_scores
                                           
        if method=='full_model':
            opt, comb_params_score, comb_scores = self.cv_full_model(source.train_x, source.train_y)
            alp = opt['alpha']
            model = LassoLars(alpha=alp, max_iter=50)
            return opt, model,comb_params_score ,comb_scores
                     
        if method=='lasso':
            model = LassoCV(alphas=[1e-3,1e-2,1e-1,1,1e1,1e2,1e3], cv=2, max_iter=50)            
            return model
        

    # grid search for no_timeSeries_Tenlar
    def cv_no_timeSeries_Tenlar(self, x, y):
        opt = {}
        
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 1.2e-1, 1.4e-1, 1.6e-1, 1.8e-1, 2e-1, 2.2e-1, 2.4e-1, 2.6e-1, 2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1, 3.35e-1, 3.5e-1]
        lamda2 = [0, 9e-3,7e-3, 6e-3, 3e-3, 1e-2, 2e-2, 5e-2, 8e-2, 1e-1, 1.1e-1, 1.2e-1, 1.3e-1, 1.4e-1, 1.5e-1, 1.6e-1, 1.7e-1, 1.8e-1, 2e-1, 2.1e-1, 2.3e-1, 2.5e-1]
        
        m = x.shape[1]
       
        alps = []
        alp = 0
                
        k = len(lamda1)
        f = len(lamda2)
        comb_params_score = []
        comb_scores = []
        comb_params = []
        
        scores = []        
        score = 0
        
        for u in range(k):
            for v in range(f):
  
                alp = lamda1[u] / math.sqrt(1 + lamda2[v]) 

                I = math.sqrt(lamda2[v]) * np.eye(m)
                z0 = np.zeros([m])

                trainX = np.concatenate((x, I), 0)
                trainY = np.concatenate((y, z0), 0)

                trainX = 1 / math.sqrt(1 + lamda2[v]) *  trainX 

                clf = LassoLars(alpha = alp, max_iter = 50)
                current_model = clf.fit(trainX, trainY)

                score = current_model.score(x, y, None)
                scores.append(score)
                comb_params_score.append(np.array([lamda1[u], lamda2[v],score]))
                comb_scores.append(np.array([u, v, score]))
                comb_params.append((u, v))

                alps.append(alp)

        scores = np.array(scores)
        idx = np.argsort(scores)        
        best_score = scores[idx[-1]]
        best_params_score = comb_params_score[idx[-1]]

        opt['alpha'] = alps[idx[-1]]
        opt['combine_params'] = comb_params[idx[-1]] 
        return opt, best_params_score, comb_params_score,comb_scores        

        print ('---------------------------------------------------')
                
      # grid search for no_l3Norm_Tenlar
    def cv_no_l3Norm_Tenlar(self,x, y, xts):
        opt = {}
        
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 1.2e-1, 1.4e-1, 1.6e-1, 1.8e-1, 2e-1, 2.2e-1, 2.4e-1, 2.6e-1, 2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1, 3.35e-1, 3.5e-1]
        lamda2 = [0, 9e-3,7e-3, 6e-3, 3e-3, 1e-2, 2e-2, 5e-2, 8e-2, 1e-1, 1.1e-1, 1.2e-1, 1.3e-1, 1.4e-1, 1.5e-1, 1.6e-1, 1.7e-1, 1.8e-1, 2e-1, 2.1e-1, 2.3e-1, 2.5e-1]
        
        m = x.shape[1]
        m_ts, n_ts = xts.shape       
        l_ts = 2000   
        ts_no = round(m_ts / l_ts)
        D = []
        D_seg = np.zeros([l_ts,n_ts])                       
        idb_ts = 0 
               
        for sp in range(ts_no):
            
            temp_ts = xts[idb_ts:(idb_ts + l_ts),:]
            tmp_m = temp_ts.shape[0]              
            for cs in range(tmp_m-1):
                D_seg[cs,:] = temp_ts[(cs + 1),:] - temp_ts[cs,:]        

            D.append(D_seg)            
            idb_ts = idb_ts + l_ts
            
        alps = []
        alp = 0
                
        k = len(lamda1)
        f = len(lamda2)
        comb_params_score = []
        comb_scores = []
        comb_params = []        
        scores = []        
        score = 0
        
        for u in range(k):
            for v in range(f):
                    
                seg_no = np.random.randint(0,ts_no)                    
                current_D = D[seg_no]   
                alp = lamda1[u] / math.sqrt(1 + lamda2[v])

                I = math.sqrt(lamda2[v]) * np.eye(m)
                ctD_m = current_D.shape[0]
                z0 = np.zeros([m + ctD_m])

                trainX = np.concatenate((x, I, current_D), 0)
                trainY = np.concatenate((y, z0), 0)
                trainX = 1 / math.sqrt(1 + lamda2[v]) * trainX 


                clf = LassoLars(alpha = alp, max_iter = 50)
                current_model = clf.fit(trainX, trainY)
                score = current_model.score(x, y, None)
                scores.append(score)
                comb_params_score.append(np.array([lamda1[u], lamda2[v], score]))
                comb_scores.append(np.array([u, v, score]))
                comb_params.append((u, v))

                alps.append(alp)

        scores = np.array(scores)
        idx = np.argsort(scores)

        best_score = scores[idx[-1]]        
        best_params_score = comb_params_score[idx[-1]]

        opt['alpha'] = alps[idx[-1]]
        opt['combine_params'] = comb_params[idx[-1]] 
        return opt, best_params_score, comb_params_score,comb_scores

        print ('---------------------------------------------------')
        

      # grid search for no_l2Norm_Tenlar
    def cv_no_l2Norm_Tenlar(self, x, y, xts):
        opt = {}
        
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 1.2e-1, 1.4e-1, 1.6e-1, 1.8e-1, 2e-1, 2.2e-1, 2.4e-1, 2.6e-1, 2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1, 3.35e-1, 3.5e-1]
        lamda3 = [1e-5,  1e-4, 1e-3,  1e-2, 1e-1, 0, 2e-1,8e-1, 1]

        m = x.shape[1]
        m_ts, n_ts = xts.shape       
        l_ts = 2000   
        ts_no = round(m_ts / l_ts)
        D = []
        D_seg = np.zeros([l_ts,n_ts])                       
        idb_ts = 0 
               
        for sp in range(ts_no):
            
            temp_ts = xts[idb_ts:(idb_ts + l_ts),:]
            tmp_m = temp_ts.shape[0]              
            for cs in range(tmp_m-1):
                D_seg[cs,:] = temp_ts[(cs + 1),:] - temp_ts[cs,:]        

            D.append(D_seg)            
            idb_ts = idb_ts + l_ts
            
        alps = []
        alp = 0
                
        k = len(lamda1)
        b = len(lamda3)
        comb_params_score = []
        comb_scores = []
        comb_params = []        
        scores = []        
        score = 0
        
        for u in range(k):
            for w in range(b):

                seg_no = np.random.randint(0,ts_no)                    
                current_D = D[seg_no]   
                alp = lamda1[u]

                I = np.eye(m)
                ctD_m = current_D.shape[0]
                z0 = np.zeros([m + ctD_m])

                trainX = np.concatenate((x, I, current_D), 0)
                trainY = np.concatenate((y, z0), 0)

                clf = LassoLars(alpha = alp, max_iter = 50)
                current_model = clf.fit(trainX, trainY)
                score = current_model.score(x, y, None)
                scores.append(score)
                comb_params_score.append(np.array([lamda1[u], lamda3[w], score]))
                comb_scores.append(np.array([u, w, score]))
                comb_params.append((u, w))

                alps.append(alp)

        scores = np.array(scores)
        idx = np.argsort(scores)

        best_score = scores[idx[-1]]        
        best_params_score = comb_params_score[idx[-1]]

        opt['alpha'] = alps[idx[-1]]
        opt['combine_params'] = comb_params[idx[-1]] 
        return opt, best_params_score, comb_params_score, comb_scores


    def cv_no_IM_Tenlar(self, x, y, xts):
        opt = {}
        
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 1.2e-1, 1.4e-1, 1.6e-1, 1.8e-1, 2e-1, 2.2e-1, 2.4e-1, 2.6e-1, 2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1, 3.35e-1, 3.5e-1]
        lamda2 = [0, 9e-3,7e-3, 6e-3, 3e-3, 1e-2, 2e-2, 5e-2, 8e-2, 1e-1, 1.1e-1, 1.2e-1, 1.3e-1, 1.4e-1, 1.5e-1, 1.6e-1, 1.7e-1, 1.8e-1, 2e-1, 2.1e-1, 2.3e-1, 2.5e-1]
        lamda3 = [1e-5,  1e-4, 1e-3,  1e-2, 1e-1, 0, 2e-1,8e-1, 1]
                    
        m = x.shape[1]
        m_ts, n_ts = xts.shape       
        l_ts = 2000  
        ts_no = round(m_ts / l_ts)
        D = []
        D_seg = np.zeros([l_ts,n_ts]) 
        idb_ts = 0 
               
        for sp in range(ts_no):
            
            temp_ts = xts[idb_ts:(idb_ts + l_ts),:]
            tmp_m = temp_ts.shape[0]              
            for cs in range(tmp_m-1):
                D_seg[cs,:] = temp_ts[(cs + 1),:] - temp_ts[cs,:]        

            D.append(D_seg)            
            idb_ts = idb_ts + l_ts
        
        alps = []
        alp = 0
                
        k = len(lamda1)
        f = len(lamda2)
        b = len(lamda3)
        comb_params_score = []
        comb_scores = []
        comb_params = []
        scores = []        
        score = 0
        
        # tuning parameters in grid
        for u in range(k):
            for v in range(f):
                for w in range(b):
                    
                    seg_no = np.random.randint(0,ts_no)
                    current_D = D[seg_no]    
                    alp = lamda1[u] / math.sqrt(1 + lamda2[v]) 

                    current_D = math.sqrt(lamda3[w]) * current_D  
                    ctD_m = current_D.shape[0]
                    z0 = np.zeros([ctD_m])
                    
                    trainX = np.concatenate((x, current_D), 0)
                    trainY = np.concatenate((y, z0), 0)
                    trainX = 1 / math.sqrt(1 + lamda2[v]) *  trainX 

                    clf = LassoLars(alpha = alp, max_iter = 50)
                    current_model = clf.fit(trainX, trainY)
                    score = current_model.score(x, y, None)
                    scores.append(score)

                    comb_params_score.append(np.array([lamda1[u], lamda2[v] ,lamda3[w], score]))
                    comb_scores.append(np.array([u, v, w, score]))
                    comb_params.append((u, v, w))

                    alps.append(alp)
                
        scores = np.array(scores)
        idx = np.argsort(scores)
        best_score = scores[idx[-1]]
        best_params_score = comb_params_score[idx[-1]]
        opt['alpha'] = alps[idx[-1]]
        opt['combine_params'] = comb_params[idx[-1]] 
        return opt, best_params_score, comb_params_score,comb_scores
    
    print ('---------------------------------------------------')    

    # tuning params in grid for entire Tenlar
    def cv_full_model(self, x, y, xts):
        opt = {}
        
        ## set hyperparameters for Tenlar 
        # best (lambda1, lambda2, lambda3) =  
        # best (lambda1, lambda2) = (0.001, 0.0005)
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 1.2e-1, 1.4e-1, 1.6e-1, 1.8e-1, 2e-1, 2.2e-1, 2.4e-1, 2.6e-1, 2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1, 3.35e-1, 3.5e-1]
        lamda2 = [0, 9e-3,7e-3, 6e-3, 3e-3, 1e-2, 2e-2, 5e-2, 8e-2, 1e-1, 1.1e-1, 1.2e-1, 1.3e-1, 1.4e-1, 1.5e-1, 1.6e-1, 1.7e-1, 1.8e-1, 2e-1, 2.1e-1, 2.3e-1, 2.5e-1]
        lamda3 = [1e-5,  1e-4, 1e-3,  1e-2, 1e-1, 0, 2e-1,8e-1, 1]
        
        ## pre augmented data                
        m = x.shape[1]
        m_ts, n_ts = xts.shape
        
        #set sequence length of segment in augmented time-series data       
        l_ts = 2000   #*** 
        ts_no = round(m_ts / l_ts)
         
        # init residual matrix D and segmented residual matrix D
        D = []
        D_seg = np.zeros([l_ts,n_ts]) 
                      
        # init begin index 
        idb_ts = 0 
               
#       split time-series data 
        for sp in range(ts_no):
#             print ('idb_ts:', idb_ts)
            temp_ts = xts[idb_ts:(idb_ts + l_ts),:]
#             print ('seg_ts:',temp_ts.shape)
#             print ('seg_ts:',temp_ts)
            
            tmp_m = temp_ts.shape[0]              
            for cs in range(tmp_m-1):
                D_seg[cs,:] = temp_ts[(cs + 1),:] - temp_ts[cs,:]        
#             print ('D_seg:',D_seg)
#             print ('D_seg:',D_seg.shape)

            D.append(D_seg)            
            idb_ts = idb_ts + l_ts
            
#         print ('D:',D)
#         print ('D:',len(D))
        
        # init alpha list and alpha 
        alps = []
        alp = 0
                
        # init iterations and save parameters combination and corresponding score to list
        k = len(lamda1)
        f = len(lamda2)
        b = len(lamda3)
        comb_params_score = []
        comb_scores = []
        comb_params = []
        
        
        
        # init scores list for calculating R^2
        scores = []        
        score = 0
        
        # tuning parameters in grid
        for u in range(k):
            for v in range(f):
                for w in range(b):
                    
                # get argumented time-series segmented data
                    # random number for selecting segment randomly
                    seg_no = np.random.randint(0,ts_no)
#                     print ('seg_no:', seg_no)
                    
                    # Ω augmented time-series data 
                    current_D = D[seg_no]   
#                     print ('current_D :', current_D )
#                     print ('current_D_shape :', current_D.shape )
                    
                                    
                ## lar_lasso → lar_supervisedElasticNet 
                     # give γ equivalent to alpha, convert only one param to two params  
                    alp = lamda1[u] / math.sqrt(1 + lamda2[v]) 
                                          
                   # concat train samples(trainX,trainY) to augmented matrix for augmenting traindata and adding hyperparameter           
                      # S1:augmenting trainX and trainY                        
                      # calculating time-series data for lambda3 and init identity matrix for lambda 2 and padding 0 for trainY
                    current_D = math.sqrt(lamda3[w]) * current_D  
                    I = math.sqrt(lamda2[v]) * np.eye(m)
                    ctD_m = current_D.shape[0]
                    # concate 
                    z0 = np.zeros([m + ctD_m])
                    
                     # cat trainX and trainY for augmenting
                    trainX = np.concatenate((x, I, current_D), 0)
                    trainY = np.concatenate((y, z0), 0)
                     # cat trainX and alpha for converting free parameters β implementing “elastic”

                    trainX = 1 / math.sqrt(1 + lamda2[v]) *  trainX 

#                     print ('current_lamda1:', lamda1[u])


                    # struct Lar_ElasticNet model using current hyperparameter
                    clf = LassoLars(alpha = alp, max_iter = 50)
                    # fit current model 
                    current_model = clf.fit(trainX, trainY)
    #                 print ('current_lar_elasticnet:', current_model)
                    # Return the coefficient of determination R^2 of the prediction

                    ######################*********************####################
                    score = current_model.score(x, y, None)
                    # save score to list
                    scores.append(score)
#                     print ('current_score:', score)
#                     print ('---------------------------------------')

                    # save combination para list
                    comb_params_score.append(np.array([lamda1[u], lamda2[v] ,lamda3[w], score]))

                    # save all locations and scores(lambda1, lambda2, lambda3)
                    comb_scores.append(np.array([u, v, w, score]))
                    comb_params.append((u, v, w))

                    # save current alpha
                    alps.append(alp)
                
        # list → numpy
        scores = np.array(scores)
        # Returns the index that would sort an array(default: ascending order)
        idx = np.argsort(scores)
        
        # print best score 
        best_score = scores[idx[-1]]
        print ('best_score:', best_score)
        
        # print best lambda param_group
        best_params_score = comb_params_score[idx[-1]]
#         print ('best_params_score:', best_params_score)
        print ('-----------------------------------------')
        
        # select highest score and save corresponding location(lamda1, lamda2)
        opt['alpha'] = alps[idx[-1]]
        opt['combine_params'] = comb_params[idx[-1]] 
        return opt, best_params_score, comb_params_score,comb_scores

    
    
    
    def evaluate(self, pred_y, true_y, metrics):
        pred_y = pred_y.reshape([-1])
        true_y = true_y.reshape([-1])
        metrics_num = len(metrics)
        results = np.zeros([metrics_num])
        for i,metric in enumerate(metrics):
            if metric == 'rmse':
                results[i] = np.sqrt(np.mean(np.power(pred_y - true_y,2)))
            elif metric == 'mape':
                results[i] = np.mean(np.abs(true_y - pred_y) / true_y)*100
            elif metric == 'correlation':
                results[i] = np.corrcoef(pred_y, true_y)[0,1]
            elif metric == 'wi':
                results[i] = np.sum(np.power(true_y - pred_y, 2)) / np.sum(np.power(np.abs(pred_y - np.mean(true_y)) + np.abs(true_y-np.mean(true_y)), 2))
        return results      
          
        
    def run_experiments(self, methods, experiments, repeat_num, metrics):
        # dic type
        results = {}
        for exp_name in experiments.keys():
            # exp_name is dict_keys type
            exp = experiments[exp_name]
            train_time, test_time = exp['times']
            evaluation = np.zeros([repeat_num, len(exp['lag_time']), len(exp['clustering']), len(exp['normalize']), len(methods), len(metrics)])
            print ('repeat_num:',repeat_num)
                        
            for r in range(repeat_num):
                for i,lag_time in enumerate(exp['lag_time']):
                    for j,clustering in enumerate(exp['clustering']):
                        for k,normalize in enumerate(exp['normalize']):
                            
                            # time-series data
                            self.source.attr2source(train_time, 
                                                    normalize,
                                                    0, 
                                                    lag_time,
                                                    data_type='time-series')                            
                            
                            # general training data
                            self.source.attr2source(train_time, 
                                                    normalize,
                                                    clustering, 
                                                    lag_time,
                                                    data_type='train')
                            # general testing data
                            self.source.attr2source(test_time,
                                                     normalize, 
                                                     clustering, 
                                                     lag_time,
                                                     data_type='test')
                            
                            
                            
                            ensemble_y = 0
                            for t, method in enumerate(methods):
                                # baseline model
                                if method == 'lasso':
                                    model = self.build_model(method, self.source)
                                    model = model.fit(self.source.train_x, self.source.train_y)
                                    predict_y = model.predict(self.source.test_x)                               
                                    
                                else:                                
                                # ablation study on validation
                                    if method == 'no_timeSeries_Tenlar':
                                        
                                        opt, best_params_score, model,comb_params_score ,comb_scores = self.build_model(method, self.source)                                
                                        model = model.fit(self.source.train_x, self.source.train_y)
                                        predict_y = model.predict(self.source.test_x)                                                                                                    
                                        
                                
                                    else:
  
                                        train_X = np.concatenate((self.source.train_x, self.source.ts_x), 0)
                                        train_Y = np.concatenate((self.source.train_y, self.source.ts_y), 0)
#                             
                                        model = model.fit(train_X, train_Y)
                                        predict_y = model.predict(self.source.test_x)
               
                                evaluation[r, i, j, k, t, :] = self.evaluate(predict_y, self.source.test_y, metrics)
                                print('exp_name={}, lag_time={}, normalize={}, clustering={}, method={} :{}'.format(
                                    exp_name, lag_time, normalize, clustering, method, evaluation[r, i, j, k, t, :]))
                                
            # save results as a dictionary 
            np.save('result/{}.npy'.format(exp_name), evaluation)            
#             results[exp_name] = evaluation


    def decimal2round2(self, results):
        sh = results.shape
        fraction = results.reshape(-1)
        for m in range(len(fraction)):
            fraction[m] = round(fraction[m],8)
        fractions = fraction.reshape(sh)
#         print ('fractions_shape',fractions.shape)
        return fractions 


---------------------------------------------------


In [4]:
os.chdir(config_ablation.path)
os.environ["CUDA_VISIBLE_DEVICES"]="3"
m = Water(config_ablation.excel_files)
m.run_experiments(config_ablation.methods, 
                  config_ablation.experiments,
                  config_ablation.repeat_num,
                  config_ablation.metrics)

repeat_num: 5
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.12186691 6.14392507 0.91994488 0.15311998]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.12186691 6.14392507 0.91994488 0.15311998]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.12186691 6.14392507 0.91994488 0.15311998]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_IM_Tenlar :[3.12186691 6.14392507 0.91994488 0.15311998]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=full_model :[3.12186691 6.14392507 0.91994488 0.15311998]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=lasso :[3.11780566 6.14548401 0.91908018 0.1533114 ]




exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.23625763 6.259006   0.91502384 0.15911526]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.23625763 6.259006   0.91502384 0.15911526]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.23625763 6.259006   0.91502384 0.15911526]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_IM_Tenlar :[3.23625763 6.259006   0.91502384 0.15911526]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=full_model :[3.23625763 6.259006   0.91502384 0.15911526]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=lasso :[3.23085966 6.25230204 0.91513886 0.15884765]




exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.05629012 6.02792216 0.93217369 0.14263188]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.05629012 6.02792216 0.93217369 0.14263188]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.05629012 6.02792216 0.93217369 0.14263188]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_IM_Tenlar :[3.05629012 6.02792216 0.93217369 0.14263188]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=full_model :[3.05629012 6.02792216 0.93217369 0.14263188]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=lasso :[3.08886303 6.0500505  0.92600887 0.14567777]




exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.0085142  5.83740478 0.93234627 0.1358171 ]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.0085142  5.83740478 0.93234627 0.1358171 ]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.0085142  5.83740478 0.93234627 0.1358171 ]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_IM_Tenlar :[3.0085142  5.83740478 0.93234627 0.1358171 ]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=full_model :[3.0085142  5.83740478 0.93234627 0.1358171 ]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=lasso :[2.95478072 5.76063007 0.9324041  0.13325629]




exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.76684693 5.3695195  0.9423882  0.12246783]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.76684693 5.3695195  0.9423882  0.12246783]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.76684693 5.3695195  0.9423882  0.12246783]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_IM_Tenlar :[2.76684693 5.3695195  0.9423882  0.12246783]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=full_model :[2.76684693 5.3695195  0.9423882  0.12246783]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=lasso :[2.7994142  5.41272058 0.93905236 0.12477689]




exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.93380843 5.8306413  0.93608599 0.13712304]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.93380843 5.8306413  0.93608599 0.13712304]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.93380843 5.8306413  0.93608599 0.13712304]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_IM_Tenlar :[2.93380843 5.8306413  0.93608599 0.13712304]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=full_model :[2.93380843 5.8306413  0.93608599 0.13712304]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=lasso :[2.93542381 5.82584024 0.93576425 0.13730381]




exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.15298534 6.21765587 0.91110677 0.15410034]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.15298534 6.21765587 0.91110677 0.15410034]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.15298534 6.21765587 0.91110677 0.15410034]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_IM_Tenlar :[3.15298534 6.21765587 0.91110677 0.15410034]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=full_model :[3.15298534 6.21765587 0.91110677 0.15410034]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=lasso :[3.13865951 6.19598489 0.91226012 0.15322576]




exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.29011015 6.45701479 0.91610245 0.16965539]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.29011015 6.45701479 0.91610245 0.16965539]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.29011015 6.45701479 0.91610245 0.16965539]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_IM_Tenlar :[3.29011015 6.45701479 0.91610245 0.16965539]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=full_model :[3.29011015 6.45701479 0.91610245 0.16965539]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=lasso :[3.26796161 6.41682762 0.91643776 0.16864929]




exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.77582692 5.37976318 0.93398014 0.12115842]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.77582692 5.37976318 0.93398014 0.12115842]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.77582692 5.37976318 0.93398014 0.12115842]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_IM_Tenlar :[2.77582692 5.37976318 0.93398014 0.12115842]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=full_model :[2.77582692 5.37976318 0.93398014 0.12115842]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=lasso :[2.74422149 5.315404   0.93476537 0.11949744]




exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.95915737 5.7229393  0.92328506 0.13742556]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.95915737 5.7229393  0.92328506 0.13742556]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.95915737 5.7229393  0.92328506 0.13742556]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_IM_Tenlar :[2.95915737 5.7229393  0.92328506 0.13742556]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=full_model :[2.95915737 5.7229393  0.92328506 0.13742556]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=lasso :[2.93307071 5.62523594 0.91981601 0.13744248]




exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.1698744  6.12708055 0.92495678 0.15025098]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.1698744  6.12708055 0.92495678 0.15025098]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.1698744  6.12708055 0.92495678 0.15025098]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_IM_Tenlar :[3.1698744  6.12708055 0.92495678 0.15025098]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=full_model :[3.1698744  6.12708055 0.92495678 0.15025098]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=lasso :[3.22142256 6.2252529  0.92311067 0.15302828]




exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.28290737 6.51225605 0.92296096 0.16740525]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.28290737 6.51225605 0.92296096 0.16740525]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.28290737 6.51225605 0.92296096 0.16740525]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_IM_Tenlar :[3.28290737 6.51225605 0.92296096 0.16740525]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=full_model :[3.28290737 6.51225605 0.92296096 0.16740525]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=lasso :[3.22713758 6.41006477 0.92435072 0.16479101]




exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.87283244 5.44315659 0.91341026 0.13722931]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.87283244 5.44315659 0.91341026 0.13722931]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.87283244 5.44315659 0.91341026 0.13722931]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_IM_Tenlar :[2.87283244 5.44315659 0.91341026 0.13722931]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=full_model :[2.87283244 5.44315659 0.91341026 0.13722931]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=lasso :[2.87397202 5.4290564  0.91334443 0.13732729]




exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.87347151 5.5981982  0.935026   0.13141663]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.87347151 5.5981982  0.935026   0.13141663]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.87347151 5.5981982  0.935026   0.13141663]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_IM_Tenlar :[2.87347151 5.5981982  0.935026   0.13141663]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=full_model :[2.87347151 5.5981982  0.935026   0.13141663]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=lasso :[2.86778893 5.57574404 0.93426192 0.1313882 ]




exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.25170689 6.36829564 0.92856671 0.15713103]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.25170689 6.36829564 0.92856671 0.15713103]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.25170689 6.36829564 0.92856671 0.15713103]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_IM_Tenlar :[3.25170689 6.36829564 0.92856671 0.15713103]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=full_model :[3.25170689 6.36829564 0.92856671 0.15713103]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=lasso :[3.23700569 6.34327575 0.92964074 0.15624118]




exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.87037456 5.70106089 0.94000447 0.12593577]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.87037456 5.70106089 0.94000447 0.12593577]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.87037456 5.70106089 0.94000447 0.12593577]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_IM_Tenlar :[2.87037456 5.70106089 0.94000447 0.12593577]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=full_model :[2.87037456 5.70106089 0.94000447 0.12593577]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=lasso :[2.86554056 5.69080012 0.9387717  0.12611924]




exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.99973335 5.99104483 0.94343445 0.13262597]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.99973335 5.99104483 0.94343445 0.13262597]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.99973335 5.99104483 0.94343445 0.13262597]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_IM_Tenlar :[2.99973335 5.99104483 0.94343445 0.13262597]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=full_model :[2.99973335 5.99104483 0.94343445 0.13262597]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=lasso :[2.98052113 5.942495   0.94202478 0.13225608]




exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.07799657 6.08541441 0.93776195 0.15290307]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.07799657 6.08541441 0.93776195 0.15290307]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.07799657 6.08541441 0.93776195 0.15290307]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_IM_Tenlar :[3.07799657 6.08541441 0.93776195 0.15290307]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=full_model :[3.07799657 6.08541441 0.93776195 0.15290307]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=lasso :[3.04245836 6.04652548 0.93737348 0.15149074]




exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.29278577 6.4468799  0.92338291 0.16418138]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.29278577 6.4468799  0.92338291 0.16418138]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.29278577 6.4468799  0.92338291 0.16418138]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_IM_Tenlar :[3.29278577 6.4468799  0.92338291 0.16418138]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=full_model :[3.29278577 6.4468799  0.92338291 0.16418138]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=lasso :[3.27853374 6.42259412 0.9243145  0.16334361]




exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.96842153 5.82759866 0.93221106 0.13554792]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.96842153 5.82759866 0.93221106 0.13554792]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.96842153 5.82759866 0.93221106 0.13554792]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_IM_Tenlar :[2.96842153 5.82759866 0.93221106 0.13554792]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=full_model :[2.96842153 5.82759866 0.93221106 0.13554792]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=lasso :[2.92544208 5.75033059 0.93276973 0.13356445]




exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.76624716 5.3933916  0.93463355 0.13187771]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.76624716 5.3933916  0.93463355 0.13187771]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.76624716 5.3933916  0.93463355 0.13187771]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=no_IM_Tenlar :[2.76624716 5.3933916  0.93463355 0.13187771]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=full_model :[2.76624716 5.3933916  0.93463355 0.13187771]
exp_name=spring, lag_time=8, normalize=1, clustering=0, method=lasso :[2.74442986 5.36056402 0.93558061 0.13062106]




exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.13853015 6.1509399  0.92207714 0.14812051]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.13853015 6.1509399  0.92207714 0.14812051]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.13853015 6.1509399  0.92207714 0.14812051]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=no_IM_Tenlar :[3.13853015 6.1509399  0.92207714 0.14812051]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=full_model :[3.13853015 6.1509399  0.92207714 0.14812051]
exp_name=spring, lag_time=10, normalize=1, clustering=0, method=lasso :[3.11568778 6.09061414 0.92365359 0.14663869]




exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.92090296 5.73504708 0.93238113 0.14241162]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.92090296 5.73504708 0.93238113 0.14241162]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.92090296 5.73504708 0.93238113 0.14241162]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=no_IM_Tenlar :[2.92090296 5.73504708 0.93238113 0.14241162]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=full_model :[2.92090296 5.73504708 0.93238113 0.14241162]
exp_name=spring, lag_time=12, normalize=1, clustering=0, method=lasso :[2.893337   5.6597643  0.93055239 0.1418418 ]




exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.04984281 6.03733652 0.92471666 0.14822162]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.04984281 6.03733652 0.92471666 0.14822162]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.04984281 6.03733652 0.92471666 0.14822162]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=no_IM_Tenlar :[3.04984281 6.03733652 0.92471666 0.14822162]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=full_model :[3.04984281 6.03733652 0.92471666 0.14822162]
exp_name=spring, lag_time=14, normalize=1, clustering=0, method=lasso :[3.06374855 6.07551876 0.92287699 0.14933437]




exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[3.14210451 6.1841162  0.92069271 0.15012995]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[3.14210451 6.1841162  0.92069271 0.15012995]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[3.14210451 6.1841162  0.92069271 0.15012995]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=no_IM_Tenlar :[3.14210451 6.1841162  0.92069271 0.15012995]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=full_model :[3.14210451 6.1841162  0.92069271 0.15012995]
exp_name=spring, lag_time=2, normalize=1, clustering=0, method=lasso :[3.13146945 6.17058953 0.92100754 0.14967501]




exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_timeSeries_Tenlar :[2.90618558 5.73704647 0.93048036 0.12939913]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l3Norm_Tenlar :[2.90618558 5.73704647 0.93048036 0.12939913]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_l2Norm_Tenlar :[2.90618558 5.73704647 0.93048036 0.12939913]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=no_IM_Tenlar :[2.90618558 5.73704647 0.93048036 0.12939913]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=full_model :[2.90618558 5.73704647 0.93048036 0.12939913]
exp_name=spring, lag_time=3, normalize=1, clustering=0, method=lasso :[2.87003402 5.66254738 0.93094677 0.12761109]




KeyboardInterrupt: 

## monitor for paper(graphs + tables)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import config_final
import numpy as np
import os
import os.path as osp

# change work content to specific path(/mnt/pami/yqliu/water/)
os.chdir(config_final.path)

def write_optimalParams(experiments, repeat_num):
    for exp_name in experiments.keys():
        # exp_name is dict_keys type
        exp = experiments[exp_name]
        for r in range(repeat_num):
            for i,lag_time in enumerate(exp['lag_time']):
                f = open(osp.join('lar_semisup_elasticnet_params','{}-{}-{}-{}-params.txt'.format(exp_name, r, lag_time, 'semi-supervised_lar_elasticnet')), 'r')

                # results with string type
                aa = f.read()
                
                # convert to dic type
#                 hyperparams_lar_elasticnet = exec(a)

                print ('{}-{}-{}-{}-params.txt'.format(exp_name, r, lag_time, 'semi-supervised_lar_elasticnet'), aa )

    f.close()
                       
# calling
write_optimalParams(config_final.experiments,config_final.repeat_num)


In [None]:
# All year prediction and save .txt to LaTeX table
os.chdir(config_final.path)

# import  all_year record and return a ndnumpy
result = np.load('result/old2new.npy')

# remove redundant dimension of length 1
# ***0th is repeat_num（e.g. all year twice = 2）shape=(rp_n.,lag,nor.,clu.,methd.,metric.)

## calculate mean results on every model for 4 metrics
mean_result = np.mean(result.squeeze(), 0)

## calculate standard results on every model for 4 metrics
std_result = np.std(result.squeeze(), 0)


# print all original results split mean and standard
print ('mean_result_original:', mean_result)

print ('std_result_original:', std_result)

# rounding
mean_result = m.decimal2round2(mean_result)
std_result = m.decimal2round2(std_result)

# print all rounding results split mean and standard
print ('mean_result_decimal:', mean_result )
print ('std_result_decimal:', std_result )

file_name = 'table_old2new.txt'
# write to result to 'f'
f = open(file_name,'w')

# write a format such as ' & & {methods}'
f.write(' &')
for method in config_final.methods:
    f.write(' & {}'.format(method))

# write a format such as  '\\\\ \\hline \n'
f.write(' \\\\ \\hline \n')


for i,metric in enumerate(config_final.metrics):
    f.write(' & {}'.format(metric))
    for j,method in enumerate(config_final.methods):
        if metric == 'wi':
            mean_result[j][i] *= 100
            std_result[j][i] *= 100
        # split mean('rmse', 'mape', 'correlation', 'wi') and std to 2 tables   
        # result : mean + std
        f.write(' & {}$\\pm${}'.format(mean_result[j][i], std_result[j][i]))
    if i == len(config_final.metrics)-1:
        f.write(' \\\\ \\hline \\hline \n')
    else:
        f.write(' \\\\ \\hline \n')
f.close()

content = open(file_name, 'r').readlines()
print(content)


""" 
Format：

rmse:lar_elasticNet     /xgboost       /   lightgbm         /    mlr     /     ridge_regression   /     lasso  /random_forest  /  mlp  /  nn  /  ensemble
     mean:              mean:1.64±0.07   mean:1.91±0.11    mean:1.48±0.07     mean:1.43$±0.05    mean:1.42±0.05   ...
     
mape:...

correlation:...

wi:...

"""

In [None]:
# Seasonal prediction
file_name = 'table_seasonal.txt'
f = open(file_name,'w')
f.write(' &')
for method in config_final.methods:
    f.write(' & {}'.format(method))
f.write(' \\\\ \\hline \n')
flag = 0
for season in ['spring', 'summer', 'autumn', 'winter']:
    result = np.load(osp.join('result', '{}.npy'.format(season))).squeeze()
    print ('mean_result_origin:',result.shape)
    
    if season == 'autumn' or season == 'winter':
        
        # ***select the best result lag as results 
        result = result[:,1,:,:]
#     flag = flag +1 
#     print ('flag:',flag)
#         print ('result_au:', result.shape)

    mean_result = np.mean(result, 0)
    std_result = np.std(result, 0) 
    print ('mean_result_shape:', mean_result )
    print ('std_result_shape:', std_result)
    
    
    # select two decimal places
    mean_result = m.decimal2round2(mean_result)
    std_result = m.decimal2round2(std_result)
#     print ('mean_result_shape2:', mean_result.shape)
    
    f.write('{}'.format(season))
    for i,metric in enumerate(config_final.metrics):
        f.write(' & {}'.format(metric))
        for j,method in enumerate(config_final.methods):
            if metric == 'wi':
                mean_result[j][i] *= 100
                std_result[j][i] *= 100
            f.write(' & {}$\\pm${}'.format(mean_result[j][i], std_result[j][i]))
        if i == len(config_final.metrics)-1:
            f.write(' \\\\ \\hline \\hline \n')
        else:
            f.write(' \\\\ \\hline \n')
f.close()

content = open(file_name, 'r').readlines()
print(content)

## plot figure about parameters influence of ElasticNet using Least-Angle Regression(lar_elasticnet)

In [None]:
from matplotlib import pyplot as plt
import numpy as np

# 3D figure toolkit
from mpl_toolkits.mplot3d import Axes3D


os.chdir(config_final.path)
results = np.load(osp.join('lar_elasticnet_3d','{}-{}-{}-{}-scores.npy'.format('spring', 0, 8, 'lar_elasticnet')))

# print ('results_shape:', results.shape)
        
# print ('results:', results)
           
f, v = results.shape

# assign X , Y and Z

X = results[:,0]
Y = results[:,1]
Z = results[:,2]




## plot 3D surface diagram

fig = plt.figure(figsize = (18,8))

# Module containing Axes3D, an object which can plot 3D objects on a 2D matplotlib figure
# ax is a Axes3D instance
ax = fig.gca(projection='3d')


X, Y = np.meshgrid(X, Y)



Z = results[:,2].T
Z_list = []
for i in range(f):
    Z_list.append(Z)
    

Z_scores = np.concatenate(Z_list) 
Z_scores = Z_scores.reshape(f,f)

ax.set_title("Parameters-Perturbation Surface Diagram on LAR_ElasticNet")
ax.set_xlabel("lambda1")
ax.set_ylabel("lambda2")
ax.set_zlabel("R^2-Score")

# print ('z_scores:',Z_scores)
# print ('z_scores_shape:',Z_scores.shape)


ax.plot_surface(X, Y, Z_scores,  cmap=plt.cm.get_cmap('rainbow'))
# ax.legend()
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('Parameters-Perturbation pic2 on LAR_ElasticNet.pdf' , bbox_inches='tight', pad_inches=0.2, dpi=300)

plt.show()
plt.close()


## plot figure about effect of previous aluminum doses in autumn

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(10,8))
styles = ['r*--','b*--', 'm>-', 'y<-', 'c^-', 'ms-', 'yv-', 'b+-', 'gh-','k-', 'c*--']
methods_fmt = ['Semi-Supervised_LAR_ElasticNet', 'LAR_ElasticNet', 'Lasso','XGBoost','LightGBM', 'MLR', 'Ridge_Regression', 'Random_Forest', 'MLP','NN', 'Ensemble']

os.chdir(config_final.path)

result = np.load(osp.join('result', 'spring.npy')).squeeze()

# print ('results:', results)

# mean experiment times
results = result.mean(0)

# result shape = [lag_time, methods]→RMSE
rmse_results = results[:, :, 0]
# result shape = [lag_time, methods]→Correlation
correlation_results = results[:, :, 2]

# create a new figure instance[Figure], figsize is width, height(in inches) of Figure
fig = plt.figure(figsize = (12, 8))

# dding sub-plot instance(Axes) and its coordinate axis [left, bottom, width, height]
ax1 = fig.add_axes([0.1, 0.1, 1.2, 1.2])
ax2 = fig.add_axes([0.2, 0.7, 0.5, 0.5])

# ploting Figure
for j,method in enumerate(methods_fmt):

    # 6 lags for same method to plot x:lags  y:RMSE
#         [2,4,6,8,10,12]
    ax1.plot(np.array([2,4,6,8,12,14]),rmse_results[:, j], styles[j],linewidth = 2.5, label=method)
    ax2.plot(np.array([2,4,6,8,12,14]),correlation_results[:, j], styles[j], label=method)
    
# set grid    
# ax1.grid()
# ax2.grid()
# set text and axis of Figure
# ax1
ax1.set_title('The Effect of Previous Aluminum Doses in Autumn', fontsize=14)
ax1.set_xlim(-1,15)
# ax1.xticks(np.linspace(-np.pi,np.pi,20))
ax1.set_ylim(0,3)
ax1.set_xlabel('The Number of Previous Timesteps(t)',fontsize=14)
ax1.set_ylabel('RMSE',fontsize=14)

#ax2
ax2.set_ylabel('Correlation',fontsize=14)
ax2.set_xlim(-1,15)
ax2.set_ylim(-0.5,3)

ax1.legend(loc = 'upper right')

# save
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('PAD in Autumn.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)

plt.show()
plt.close()

## plot figure about effect of previous aluminum doses in winter

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(9,7))
styles = ['r*--','b*--', 'm>-', 'y<-', 'c^-', 'ms-', 'yv-', 'b+-', 'gh-','k-', 'c*--']

methods_fmt = ['LAR_ElasticNet', 'Lasso','XGBoost','LightGBM', 'MLR', 'Ridge_Regression', 'Random_Forest', 'MLP','NN', 'Ensemble']

os.chdir(config_final.path)

result = np.load(osp.join('result', 'winter.npy')).squeeze()

# mean experiment times
results = result.mean(0)

# result shape = [lag_time, methods]→RMSE
rmse_results = results[:, :, 0]
# result shape = [lag_time, mehtods]→Wi
wi_results = results[:, :, 3]

# create a new figure instance[Figure], figsize is width, height(in inches) of Figure
fig = plt.figure(figsize = (9, 7))

# dding sub-plot instance(Axes) and its coordinate axis [left, bottom, width, height]
ax1 = fig.add_axes([0.1, 0.1, 1.2, 1.2])
ax2 = fig.add_axes([0.2, 0.7, 0.5, 0.5])

# ploting Figure
for j,method in enumerate(methods_fmt):

    # 6 lags for same method to plot x:lags  y:RMSE
#         [2,4,6,8,10,12]
    ax1.plot(np.array([2,4,6,8,10,12]),rmse_results[:, j], styles[j], linewidth = 2.5, label=method)
    ax2.plot(np.array([2,4,6,8,10,12]),wi_results[:, j], styles[j], label=method)
    
# set grid    
# ax1.grid()
# ax2.grid()
# set text and axis of Figure
# ax1
ax1.set_title('The Effect of Previous Aluminum Doses in Winter', fontsize=14)
ax1.set_xlim(-1,13)
ax1.set_ylim(0,8)
ax1.set_xlabel('The Number of Previous Timesteps(t)',fontsize=14)
ax1.set_ylabel('RMSE',fontsize=14)

#ax2
ax2.set_ylabel('WI',fontsize=14)
ax2.set_xlim(-1,13)
ax2.set_ylim(-0.5,2)

ax1.legend(loc = 'upper right')

# save
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('PAD in water.pdf' , bbox_inches='tight', pad_inches=0.2, dpi=300)

plt.show()
plt.close()

## plot figure about scatterplot of measured/calculated coagulant dosage in training and testing

In [None]:
# get all random result data
GT_test, GT_train, predicted_results_test, predicted_results_train = m.specify_experiments(config_final.methods, config_final.experiments)
print ("loading completely...")

In [None]:
import matplotlib.pyplot as plt

  
# print ('predicted_results_test_shape:', predicted_results_test.shape) 
# print ('predicted_results_test:',predicted_results_test)
    
# set sample number
sample_number = 150 
L = np.arange(sample_number)                                                                                           
                                                                                           
# set random sample_id                                                                                            
sample_id = np.random.randint(0,(1000 - sample_number + 1) )

print ('sample_id:',sample_id)
                                                                                           
fig = plt.figure(figsize=(9,6))
styles = ['r-','b-', 'k-', 'y-', 'c-', 'm-', 'y-', 'b-', 'g-','c--']
methods_fmt = ['LAR_ElasticNet', 'Lasso','XGBoost','LightGBM', 'MLR', 'Ridge_Regression', 'Random_Forest', 'MLP','NN', 'Ensemble']

os.chdir(config_final.path)


# create a new figure instance[Figure], figsize is width, height(in inches) of Figure
fig = plt.figure(figsize = (13, 7))

# adding sub-plot instance(Axes) and its coordinate axis [left, bottom, width, height]
ax1 = fig.add_axes([0.1, 0.1, 1.2, 1.2])
ax2 = fig.add_axes([0.2, 0.77, 0.5, 0.5])


# vv = predicted_results_test[j, sample_id:(sample_id+sample_number)]
# print ('vv_shape:',vv.shape)

# ploting Figure
for j,method in enumerate(methods_fmt):

    # use random predicted result data for training and testing
    ax1.plot(L,predicted_results_test[j, sample_id:(sample_id+sample_number)], styles[j], linewidth = 1, label=method)                                                                                                                                                                                      
    ax2.plot(L,predicted_results_train[j, sample_id:(sample_id+sample_number)], styles[j], linewidth = 1, label=method)

# use random GT result data for training and testing
ax1.plot(L,GT_test[sample_id:(sample_id+sample_number)], 'k:', linewidth = 1.5, label='optimal')                                                                                                                                                                                      
ax2.plot(L,GT_train[sample_id:(sample_id+sample_number)], 'k:', linewidth = 1.5, label='optimal')                                                                                                                                                                                      
                                                                                           

                                                                                           
# set grid    
# ax1.grid()
# ax2.grid()
# set text and axis of Figure
# ax1
ax1.set_title('The Scatterplot of Measured/Calculated Coagulant Doses in Traing and Testing', fontsize=14)
ax1.set_xlim(-1,160)
ax1.set_ylim(20,80)
ax1.set_xlabel('N-th Sample(Testing)',fontsize=14)
ax1.set_ylabel('Coagulant Dosage(mg/L)',fontsize=14)

#ax2
ax2.set_xlabel('N-th Sample(Training)',fontsize=14)                                                                                           
ax2.set_xlim(-1,160)
ax2.set_ylim(20,80)

ax1.legend(loc = 'upper right')

# save
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('Measured and calculated CD_pic.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)
# plt.savefig('previous.png',bbox_inches='tight', pad_inches=0.2, dpi = 300)


plt.show()
plt.close()

## plot histogram of features

In [None]:
# get whole data(15.1.1-17.1.1) and non-normalized
os.chdir(config_final.path)
os.environ["CUDA_VISIBLE_DEVICES"]="0"
m = Water(config_final.excel_files)
m.source.attr2source([('2015-01-01 00:00:00','2017-01-01 00:00:00')], 0, 0, 3, 'time-series')
dataset_all = m.source.all_x
print ('dataset_all_shape:',dataset_all.shape)
label_all = m.source.all_y
print ('label_all_shape:',label_all.shape)

print ("loading completely...")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

fig = plt.figure(figsize=(18,14))
plt.title('Histogram of features in the whole dataset')

## plot first feature[RW-Nitrite]

#  subplot[nrows, ncols,index], describing the position of the subplot, index starts at 1 in the upper left corner and increases to the right
axes = plt.subplot(3,3,1)

# input firest feature and set bins equivalent to (20+1)
plt.hist(dataset_all[:,0],20, facecolor='b')
# Remove the top and right spines from plot(s)
sns.despine()
#set labels
plt.xlabel('RW-Nitrite '+r'(mg/L)')  
plt.ylabel('Number of samples')  
plt.xlim(0,300)
plt.grid(True)


#id.
axes = plt.subplot(3,3,2)
plt.hist(dataset_all[:,1],20, facecolor='g')
sns.despine()
plt.xlabel('RW-Conductivity '+r'($\mu$s/cm)')   
plt.xlim(200,800)
plt.grid(True)


axes = plt.subplot(3,3,3)
plt.hist(dataset_all[:,2],20, facecolor='r')
sns.despine()
plt.xlabel('RW-pH')   
plt.xlim(7,8.5)
plt.grid(True)


axes = plt.subplot(3,3,4)
plt.hist(dataset_all[:,3],60, facecolor='c')
sns.despine()
plt.xlabel('RW-Turbidity '+r'(NTU)')  
plt.ylabel('Number of samples')
plt.xlim(0,100)
plt.grid(True)



axes = plt.subplot(3,3,5)
plt.hist(dataset_all[:,4],20, facecolor='m')
sns.despine()
plt.xlabel('RW-Dissolved Oxygen '+r'(mg/L)') 
plt.grid(True)



axes = plt.subplot(3,3,6)
plt.hist(dataset_all[:,5],20, facecolor='y')
sns.despine()
plt.xlabel('SW-Turbidity '+r'(NTU)')    
plt.xlim(0,2)
plt.grid(True)


axes = plt.subplot(3,3,7)
plt.hist(dataset_all[:,6],20, facecolor='b')
sns.despine()
plt.xlabel('SW-Dissolved Oxygen '+'(mg/L)')  
plt.ylabel('Number of samples')
plt.grid(True)



axes = plt.subplot(3,3,8)
plt.hist(dataset_all[:,7],10, facecolor='purple')
sns.despine()
plt.xlabel('C-Turbidity '+r'(NTU)')       
plt.xlim(0,0.5)  
plt.grid(True)


axes = plt.subplot(3,3,9)
plt.hist(label_all,20, facecolor='k')
sns.despine()
plt.xlabel('Optimal coagulant dosage '+r'(mg/L)')       
plt.xlim(0,120)
plt.grid(True)


# automatically adjusts subplot params so that the subplot(s) fits in to the figure area
plt.tight_layout()

# save .pdf
fig.savefig('Features_histogram_pic.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)
plt.show() 




## plot features in a period time

In [None]:
# get random data(15.5.5-15.5.7) and non-normalized
m.source.attr2source([('2015-05-05 00:00:00','2015-05-07 00:00:00')], 0, 0, 3, data_type='all')
a = m.source.all_x
b = m.source.all_y
print ('a_shape:',a.shape)
print ('b_shape:',b.shape)
print ("loading completely...")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

sns.set_style("ticks")


fig = plt.figure(figsize=(8,18))

axes = plt.subplot(9,1,1)
plt.plot(a[:,0], 'r-', linewidth = 1.8)
sns.despine()
plt.ylabel('RW-Nitrite (mg/L)')
plt.grid(True)

axes = plt.subplot(9,1,2)
plt.plot(a[:,1], 'b-', linewidth = 1.8)
sns.despine()
plt.ylabel('RW-Conductivity (mg/L)')
plt.grid(True)

axes = plt.subplot(9,1,3)
plt.plot(a[:,2], 'y-', linewidth = 1.8)
plt.ylabel('RW-pH')
plt.grid(True)


axes = plt.subplot(9,1,4)
plt.plot(a[:,3], 'c-', linewidth = 1.8)
sns.despine()
plt.ylabel('RW-Turbidity')
plt.grid(True)


axes = plt.subplot(9,1,5)
plt.plot(a[:,4], 'm-', linewidth = 1.8)
sns.despine()
plt.ylabel('RW-Dissolved Oxygen')
plt.grid(True)


axes = plt.subplot(9,1,6)
plt.plot(a[:,5], 'y-', linewidth = 1.8)
sns.despine()
plt.ylabel('SW-Turbidity')
plt.grid(True)


axes = plt.subplot(9,1,7)
plt.plot(a[:,6], 'r--',linewidth = 1.8)
sns.despine()
plt.ylabel('SW-Dissolved Oxygen')
plt.grid(True)


axes = plt.subplot(9,1,8)
plt.plot(a[:,7], 'y--', linewidth = 1.8)
sns.despine()
plt.ylabel('C-Turbidity')
plt.grid(True)

    
plt.subplot(9,1,9)
plt.plot(b, 'k-', linewidth = 2)
sns.despine()
plt.ylabel('Optimal coagulant dosage')
plt.xlabel('Sample')
plt.grid(True)

    
plt.tight_layout()
# save .pdf
fig.savefig('Fetures_values_pic.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)
plt.show() 