# Comparison Experiment for Time-consistent Elastic Net with Least-angle Regression (TENLar) vs 9 Baseline Models for Water Treatment 


In [None]:
from DataSet import DataSet
import config_final
import os.path as osp
import pandas as pd
import xlrd
import os
import numpy as np
import math
import logging
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoLars
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import LassoLarsCV
from sklearn.linear_model import LassoCV
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
import lightgbm as lgb
from sklearn.linear_model import LinearRegression
from sklearn import cross_validation
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, CuDNNLSTM
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

## Training + Validating All Models, and Predicting for Shanghai SMI Dataset

In [None]:
class Water:
    def __init__(self, excel_files):
        self.source = DataSet(excel_files)

""" ***************Structure All Model Instances***************"""
    def build_model(self, method, source):        
        if method=='TENlar':
            opt, D = self.cv_TENlar(source.train_x, source.train_y, source.ts_x)
            alp = opt['alpha']
            best_params_score = opt['combine_params']
            best_params_group = best_params_score[0:3]
            model = LassoLars(alpha = alp, max_iter=1000)
            return model, D, best_params_group
                               
        if method=='ENlar':
            opt = self.cv_ENlar(source.train_x, source.train_y)
            alp = opt['alpha']
            best_params_group = opt['combine_params'][0:2]
            model = LassoLars(alpha = alp, max_iter=1000)
            return model, best_params_group
                    
        if method=='lasso':
            model = LassoCV(alphas=[1e-3,1e-2,1e-1,1,1e1,1e2,1e3], cv=2, max_iter=50)            
            return model
        
        if method=='xgboost':
            xgb_model = xgb.XGBRegressor()
            model = GridSearchCV(xgb_model,
                       {'learning_rate':[0.09],
                        'n_estimators':[700],
                        'max_depth':[6]                 
                       },cv=5,verbose=1)                       
            return model
        
        if method=='lightgbm':
            estimator = lgb.LGBMRegressor()
            param_grid = {'learning_rate':[0.1],
                          'n_estimators':[20],                      
                          'num_leaves':[6]
                          }
            model = GridSearchCV(estimator, param_grid,cv=5)
            return model
            
        if method=='mlr':
            model = LinearRegression()
            return model
       
        if method=='ridge_regression':
            model = RidgeCV(alphas=[1e-2,1e-1,1,1e1,1e2], cv=5)
            return model

        if method=='random_forest':
            opt = self.cv_RandomForestRegressor(source.train_x, source.train_y)
            model = RandomForestRegressor(**opt)
            return model
            
        if method=='mlp':
            n, m = source.train_x.shape
            model = Sequential()
            model.add(Dense(512, activation='relu', input_dim=m))
            model.add(Dense(128, activation='relu'))
            model.add(Dense(1))
            adam = Adam(lr=1e-4)
            model.compile(loss='mean_squared_error', optimizer=adam)
            return model
                      
        if method=='nn':
            n, m = source.train_x.shape
            model = Sequential()
            model.add(Dense(512, activation='relu', input_dim=m))
            model.add(Dense(1))
            adam = Adam(lr=1e-4)
            model.compile(loss='mean_squared_error', optimizer=adam)
            return model
    
    """ ***************Validating for TENLar***************"""
    def self.cv_TENlar(self, x, y, xts):
        opt = {}        
        ## set hyperparameters for TENLar 
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1,1.2e-1,1.4e-1,1.6e-1,1.8e-1,2e-1,2.2e-1,2.4e-1,2.6e-1,2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1, 3.35e-1, 3.5e-1]
        lamda2 = [0, 9e-3, 7e-3, 6e-3, 3e-3, 1e-2, 2e-2, 5e-2, 8e-2, 1e-1, 1.1e-1, 1.2e-1, 1.3e-1, 1.4e-1, 1.5e-1, 1.6e-1, 1.7e-1, 1.8e-1, 2e-1, 2.1e-1, 2.3e-1, 2.5e-1]
        lamda3 = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0, 2e-1,8e-1, 1]        
        ## pre augmented data                
        m = x.shape[1]
        m_ts, n_ts = xts.shape        
        # set sequence length of segment in augmented time-series data       
        l_ts = 4000   #4000
        # 4 segments
        ts_no = round(m_ts / l_ts)        
        # init residual matrix D and segmented residual matrix D
        D = []
        D_seg = np.zeros([l_ts, n_ts])                       
        # init begin index 
        idb_ts = 0 
               
        #  calculating difference between two time-continuous samples as segment
        for sp in range(ts_no):
            temp_ts = xts[idb_ts:(idb_ts + l_ts),:]           
            tmp_m = temp_ts.shape[0]              
            for cs in range(tmp_m-1):
                D_seg[cs,:] = temp_ts[(cs + 1),:] - temp_ts[cs,:]        
            D.append(D_seg)            
            idb_ts = idb_ts + l_ts
                    
        # init alpha (lambda1 + lambda2) and lambda3 
        alps = []
        alp = 0              
        # init iterations and save parameters combination and corresponding score to list
        k = len(lamda1)
        f = len(lamda2)
        b = len(lamda3)
        comb_params_score = []
        comb_scores = []
        comb_params = []             
        # init scores list for calculating R^2
        scores = []        
        score = 0
        
        # Tuning parameters in grid
        for u in range(k):
            for v in range(f):
                for w in range(b):
                    
                ## get argumented time-series segmented data
                    # random number for selecting segment randomly
                    seg_no = np.random.randint(0,ts_no)                    
                    current_D = D[seg_no]   
                                                        
                ## lar_lasso → lar_supervisedElasticNet 
                     # alpha 
                    alp = lamda1[u] / math.sqrt(1 + lamda2[v]) 
                                          
                   ## concat (trainX,trainY) to convert lar_lasso to TENLar                                 
                      # structure time-consistent term
                      # detect abrupt change of turbidity and counting
                    turbidity = current_D[:,1]
                      # set a threshold of abrupt change 20NTU 
                    abrupt_threshold = 20
                    turbidity_ab_temp = (turbidity * (turbidity > abrupt_threshold))>0
                    turbidity_ab_no = (turbidity_ab_temp[turbidity_ab_temp==True]).shape[0]
                    average_ab_no = 100
      
                      # define and set decay parameter
                    a_s = (1- (average_ab_no / current_D.shape[0])) * 10
                      # calculating time-consistent term
                    current_D = math.sqrt(a_s * lamda3[w]) * current_D  
                      # calculating unit term 
                    I = math.sqrt(lamda2[v]) * np.eye(m)                   
                    ctD_m = current_D.shape[0]
                    z0 = np.zeros([ctD_m + m])
                    
                     # cat trainX and trainY for converting
                    trainX = np.concatenate((x, I, current_D), 0)
                    trainY = np.concatenate((y, z0), 0)
                     # cat trainX and alpha for converting free parameters β implementing “elastic”
                    trainX = 1 / math.sqrt(1 + lamda2[v]) * trainX 
                    # struct Lar_ElasticNet model using current hyperparameter
                    clf = LassoLars(alpha = alp, max_iter = 1000)
                    current_model = clf.fit(trainX, trainY)

                    score = current_model.score(x, y, None)
                    scores.append(score)

                    # save 3 hyperparameters
                    comb_params_score.append(np.array([lamda1[u], lamda2[v] ,lamda3[w], score]))
                    comb_scores.append(np.array([u, v, w, score]))
                    comb_params.append((u, v, w))
                    alps.append(alp)
                
        # list → numpy
        scores = np.array(scores)
        # returns the index that would sort an array(default: ascending order)
        idx = np.argsort(scores)
        
        # print best score 
        best_score = scores[idx[-1]]
        print ('best_score:', best_score)        
        print ('-'*16)       
        # select highest score and corresponding hyperparameters
        opt['combine_params'] = comb_params_score[idx[-1]]
        opt['alpha'] = alps[idx[-1]]
        return opt, D
                    
        
    """ Tuning params in grid for ENLar"""     
    def cv_ENlar(self,x,y):
        
        opt = {}
        lamda1 = [1e-3, 5e-3, 1e-2, 5e-2, 1e-1,1.2e-1,1.4e-1,1.6e-1,1.8e-1,2e-1,2.2e-1,2.4e-1,2.6e-1,2.8e-1, 3e-1, 3.1e-1, 3.2e-1, 3.23e-1, 3.25e-1, 3.27e-1, 3.3e-1,3.35e-1,3.5e-1]
        lamda2 = [1e-3,1e-2,1e-1,0,1,1e1,1e2,1e3]       
        alps = []
        alp = 0        
        k = len(lamda1)
        f = len(lamda2)
        comb_params_score = []
        comb_scores = []
        comb_params = []                       
        scores = []        
        score = 0
        
        for u in range(k):
            for v in range(f):
                alp = lamda1[u] / math.sqrt(1 + lamda2[v]) 
                m = x.shape[1]
                I = math.sqrt(lamda2[v]) * np.eye(m)
                z0 = np.zeros([m])
                trainX = np.concatenate((x, I), 0)
                trainY = np.concatenate((y, z0), 0)
                trainX =  trainX * 1/math.sqrt(1 + lamda2[v]) 
                clf = LassoLars(alpha = alp, max_iter = 1000)
                current_model = clf.fit(trainX, trainY)                
                score = current_model.score(x, y, None)
                scores.append(score)                
                comb_params_score.append(np.array([lamda1[u], lamda2[v], score]))                
                comb_scores.append(np.array([u, v, score]))
                comb_params.append((u,v))                
                alps.append(alp)                
        scores = np.array(scores)
        idx = np.argsort(scores)
        best_score = scores[idx[-1]]
        opt['alpha'] = alps[idx[-1]]
        opt['combine_params'] = comb_params_score[idx[-1]]
        return opt
           
    def cv_RandomForestRegressor(self, x, y):
        m_f = x.shape[0]
        parameters_lists = {'max_features': list(range(9,x.shape[1],9)),
                            'max_depth':[16,64,256],
                           'n_estimators':[128]
                            }
        opt = {}
        for (pname,plist) in parameters_lists.items():
            k = len(plist)
            scores = np.zeros([k])
            for p in range(k):
                argues = dict(opt)
                argues[pname] = plist[p]
                clf = RandomForestRegressor(**argues)
                scores[p] = np.mean(cross_validation.cross_val_score(clf,x,y,cv=5,scoring='neg_mean_squared_error'))
            idx = np.argsort(scores)
            opt[pname] = plist[idx[k-1]]
        return opt
    
    """**************Experiments and Predicting**************"""      
    def run_experiments(self, methods, experiments, repeat_num, metrics):
        results = {}
        for exp_name in experiments.keys():
            exp = experiments[exp_name]
            train_time, test_time = exp['times']
            evaluation = np.zeros([repeat_num, len(exp['lag_time']), len(exp['clustering']), len(exp['normalize']), len(methods), len(metrics)])
            print ('repeat_num:',repeat_num)
                        
            for r in range(repeat_num):
                for i,lag_time in enumerate(exp['lag_time']):
                    for j,clustering in enumerate(exp['clustering']):
                        for k,normalize in enumerate(exp['normalize']):
                            
                            # time-series data
                            self.source.attr2source(train_time, 
                                                    normalize,
                                                    0, 
                                                    lag_time,
                                                    data_type='time-series')                            
                            
                            # general training data
                            self.source.attr2source(train_time, 
                                                    normalize,
                                                    clustering, 
                                                    lag_time,
                                                    data_type='train')
                            # general testing data
                            self.source.attr2source(test_time,
                                                     normalize, 
                                                     clustering, 
                                                     lag_time,
                                                     data_type='test')                            
                            
                            ensemble_y = 0
                            for t, method in enumerate(methods):
                                if method == 'ensemble':
                                    predict_y = ensemble_y / (len(methods)-1.0)
                                    
                                else:                                
                                # structure model every time
                                    if method == 'TENlar':
                                        model, D, best_params_group = self.build_model(method, self.source)                                                                  
                                        # Training
                                         # get optimized hyper-parameters
                                        lambda1 = best_params_group[0]
                                        lambda2 = best_params_group[1]
                                        lambda3 = best_params_group[2]
                                        
                                        # construct trainX and trainY
                                        ts_seg_no = len(D) 
                                        ts_no = np.random.randint(0,ts_seg_no)
                                        D_no = D[ts_no]
                                        exp_ab_no = 100
                                        a_s = (1- (exp_ab_no / D_no.shape[0])) * 10
                                        vald_D = math.sqrt(a_s * lambda3) * D_no
                                        vald_I = math.sqrt(lambda2) * np.eye((self.source.train_x.shape[1]))
                                        valdD_m = vald_D.shape[0]
                                        unlabel_y = np.zeros([valdD_m + (self.source.train_x.shape[1])])
                                        
                                        trainX = np.concatenate((self.source.train_x, vald_I, vald_D), 0)
                                        trainY = np.concatenate((y, unlabel_y), 0)
#                                         train_X = np.concatenate((self.source.train_x, self.source.ts_x), 0)
#                                         train_Y = np.concatenate((self.source.train_y, self.source.ts_y), 0)                              
                                        
                                        # fit general parameters
                                        model = model.fit(train_X, train_Y)
                                        predict_y = model.predict(self.source.test_x)                        
                                                                            
             
                                    elif method == 'ENLar':
                                        model, best_params_group = self.build_model(method, self.source)
                                        lambda2 = best_params_group[1]
                                        # structure trainX and trainY        
                                        cv_I = math.sqrt(lambda2) * np.eye((self.source.train_x.shape[1]))
                                        unlab_y = np.zeros([(self.source.train_x.shape[1])])
                                        trainX = np.concatenate((self.source.train_x, cv_I), 0)
                                        trainY = np.concatenate((y, unlab_y), 0)
                                        model = model.fit(trainX, trainY)
                                        predict_y = model.predict(self.source.test_x)                        
                          
                                    elif method == 'mlp' or method == 'nn':
                                        model = self.build_model(method, self.source) 
                                        model.fit(self.source.ts_x[0:1500, :], self.source.ts_y[0:1500], 
                                                  epochs=200, batch_size=128,
                                                  verbose=0
                                                 )
                                        
                                        predict_y = model.predict(self.source.test_x)
                                    

                                    else:
                                        model = self.build_model(method, self.source)
                                        model = model.fit(self.source.train_x, self.source.train_y)
                                        predict_y = model.predict(self.source.test_x)
                                    ensemble_y += predict_y.reshape([-1, 1])
                                        
                                evaluation[r, i, j, k, t, :] = self.evaluate(predict_y, self.source.test_y, metrics)
                                print('exp_name={}, lag_time={}, normalize={}, clustering={}, method={} :{}'.format(
                                    exp_name, lag_time, normalize, clustering, method, evaluation[r, i, j, k, t, :]))
                                
            # save results as a dictionary 
            np.save('result/{}.npy'.format(exp_name), evaluation)            
#             results[exp_name] = evaluation


    """**************Grapth Experiments**************"""
    def specify_experiments(self, methods, experiments):
        results0 = {}
        exp = experiments['spring']
        train_time, test_time = exp['times']
        self.source.attr2source(train_time,0,0,10,data_type='train')
        self.source.attr2source(test_time,0,0,10,data_type='test')
        
        # save GT coagulant dosage in traindata and testdata
        GT_test = self.source.test_y
        GT_train = self.source.train_y
        
        # save all needed predicted results
        predicted_results_test = np.zeros( [len(methods), 1000])
        predicted_results_train = np.zeros( [len(methods), 1000])
        
                
        ensemble_y_train = 0
        ensemble_y_test = 0
        for t, method in enumerate(methods):
            if method == 'ensemble':
                predict_y_train = ensemble_y_train / (len(methods)-1.0)
                predict_y_test = ensemble_y_test / (len(methods)-1.0)

            else:                                                
                # fit model, then get predicted results using traindata and testdata 
                if method == 'TENLar':
                    model, D, best_params_group  = self.build_model(method, self.source)
                    model = model.fit(self.source.train_x, self.source.train_y)
                    predict_y_test = model.predict(self.source.test_x)
                    predict_y_train = model.predict(self.source.train_x) 
                    
                
                elif method == 'ENLar':
                    model, best_params_group = self.build_model(method, self.source)
                    model = model.fit(self.source.train_x, self.source.train_y)
                    predict_y_test = model.predict(self.source.test_x)
                    predict_y_train = model.predict(self.source.train_x)                    
                                                            
                elif method == 'mlp' or method == 'nn':
                    model = self.build_model(method, self.source)
                    model.fit(self.source.train_x, self.source.train_y, 
                              epochs=200, batch_size=128,
                              verbose=0)
                    # get predicted results using testdata
                    predict_y_test = model.predict(self.source.test_x)
                    # get predicted results using traindata
                    predict_y_train = model.predict(self.source.train_x)

                else:
                    model = self.build_model(method, self.source)
                    model = model.fit(self.source.train_x, self.source.train_y)
                    predict_y_test = model.predict(self.source.test_x)
                    predict_y_train = model.predict(self.source.train_x)
                    
                ensemble_y_test += predict_y_test.reshape([-1, 1])
                ensemble_y_train += predict_y_train.reshape([-1, 1])
                
            predicted_results_test[t, :] = predict_y_test.reshape(1000,)
            predicted_results_train[t, :] = predict_y_train.reshape(1000,)
        return GT_test, GT_train, predicted_results_test, predicted_results_train    
    
        
    """**************Utility**************"""     
    def evaluate(self, pred_y, true_y, metrics):
        pred_y = pred_y.reshape([-1])
        true_y = true_y.reshape([-1])
        metrics_num = len(metrics)
        results = np.zeros([metrics_num])
        for i,metric in enumerate(metrics):
            if metric == 'rmse':
                results[i] = np.sqrt(np.mean(np.power(pred_y - true_y,2)))
            elif metric == 'mape':
                results[i] = np.mean(np.abs(true_y - pred_y) / true_y)*100
            elif metric == 'correlation':
                results[i] = np.corrcoef(pred_y, true_y)[0,1]
            elif metric == 'wi':
                results[i] = np.sum(np.power(true_y - pred_y, 2)) / np.sum(np.power(np.abs(pred_y - np.mean(true_y)) + np.abs(true_y-np.mean(true_y)), 2))
        return results        
    
    def decimal2round2(self, results):
        sh = results.shape
        fraction = results.reshape(-1)
        for m in range(len(fraction)):
            fraction[m] = round(fraction[m],8)
        fractions = fraction.reshape(sh)
        return fractions 

## Runnable Interface and CUDA selecting

In [None]:
os.chdir(config_final.path)
os.environ["CUDA_VISIBLE_DEVICES"]="1"
m = Water(config_final.excel_files)
m.run_experiments(config_final.methods, 
                  config_final.experiments,
                  config_final.repeat_num,
                  config_final.metrics)

## Visualization

## All Models on RMSEs and Rs of Aluminum Doses in Different Time-lagged Terms of Autumn 

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(10,8))
styles = ['r*--','b*--', 'm>-', 'y<-', 'c^-', 'ms-', 'yv-', 'b+-', 'gh-','k-', 'c*--']
methods_fmt = ['Semi-Supervised_LAR_ElasticNet', 'LAR_ElasticNet', 'Lasso','XGBoost','LightGBM', 'MLR', 'Ridge_Regression', 'Random_Forest', 'MLP','NN', 'Ensemble']

os.chdir(config_final.path)

result = np.load(osp.join('result', 'spring.npy')).squeeze()

# print ('results:', results)

# mean experiment times
results = result.mean(0)

# result shape = [lag_time, methods]→RMSE
rmse_results = results[:, :, 0]
# result shape = [lag_time, methods]→Correlation
correlation_results = results[:, :, 2]

# create a new figure instance[Figure], figsize is width, height(in inches) of Figure
fig = plt.figure(figsize = (12, 8))

# dding sub-plot instance(Axes) and its coordinate axis [left, bottom, width, height]
ax1 = fig.add_axes([0.1, 0.1, 1.2, 1.2])
ax2 = fig.add_axes([0.2, 0.7, 0.5, 0.5])

# ploting Figure
for j,method in enumerate(methods_fmt):

    # 6 lags for same method to plot x:lags  y:RMSE
#         [2,4,6,8,10,12]
    ax1.plot(np.array([2,4,6,8,12,14]),rmse_results[:, j], styles[j],linewidth = 2.5, label=method)
    ax2.plot(np.array([2,4,6,8,12,14]),correlation_results[:, j], styles[j], label=method)
    
# set grid    
# ax1.grid()
# ax2.grid()
# set text and axis of Figure
# ax1
ax1.set_title('The Effect of Previous Aluminum Doses in Autumn', fontsize=14)
ax1.set_xlim(-1,15)
# ax1.xticks(np.linspace(-np.pi,np.pi,20))
ax1.set_ylim(0,3)
ax1.set_xlabel('The Number of Previous Timesteps(t)',fontsize=14)
ax1.set_ylabel('RMSE',fontsize=14)

#ax2
ax2.set_ylabel('Correlation',fontsize=14)
ax2.set_xlim(-1,15)
ax2.set_ylim(-0.5,3)

ax1.legend(loc = 'upper right')

# save
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('PAD in Autumn.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)

plt.show()
plt.close()

## All Models on RMSEs and Rs of Aluminum Doses in Different Time-lagged Terms of Winter

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(9,7))
styles = ['r*--','b*--', 'm>-', 'y<-', 'c^-', 'ms-', 'yv-', 'b+-', 'gh-','k-', 'c*--']

methods_fmt = ['LAR_ElasticNet', 'Lasso','XGBoost','LightGBM', 'MLR', 'Ridge_Regression', 'Random_Forest', 'MLP','NN', 'Ensemble']

os.chdir(config_final.path)

result = np.load(osp.join('result', 'winter.npy')).squeeze()

# mean experiment times
results = result.mean(0)

# result shape = [lag_time, methods]→RMSE
rmse_results = results[:, :, 0]
# result shape = [lag_time, mehtods]→Wi
wi_results = results[:, :, 3]

# create a new figure instance[Figure], figsize is width, height(in inches) of Figure
fig = plt.figure(figsize = (9, 7))

# dding sub-plot instance(Axes) and its coordinate axis [left, bottom, width, height]
ax1 = fig.add_axes([0.1, 0.1, 1.2, 1.2])
ax2 = fig.add_axes([0.2, 0.7, 0.5, 0.5])

# ploting Figure
for j,method in enumerate(methods_fmt):

    # 6 lags for same method to plot x:lags  y:RMSE
#         [2,4,6,8,10,12]
    ax1.plot(np.array([2,4,6,8,10,12]),rmse_results[:, j], styles[j], linewidth = 2.5, label=method)
    ax2.plot(np.array([2,4,6,8,10,12]),wi_results[:, j], styles[j], label=method)
    
# set grid    
# ax1.grid()
# ax2.grid()
# set text and axis of Figure
# ax1
ax1.set_title('The Effect of Previous Aluminum Doses in Winter', fontsize=14)
ax1.set_xlim(-1,13)
ax1.set_ylim(0,8)
ax1.set_xlabel('The Number of Previous Timesteps(t)',fontsize=14)
ax1.set_ylabel('RMSE',fontsize=14)

#ax2
ax2.set_ylabel('WI',fontsize=14)
ax2.set_xlim(-1,13)
ax2.set_ylim(-0.5,2)

ax1.legend(loc = 'upper right')

# save
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('PAD in water.pdf' , bbox_inches='tight', pad_inches=0.2, dpi=300)

plt.show()
plt.close()

## The Comparison Between Measured and Predictive Dosage of 10 Methods in Testing and Training

In [None]:
# get all random result data
GT_test, GT_train, predicted_results_test, predicted_results_train = m.specify_experiments(config_final.methods, config_final.experiments)
print ("loading completely...")

In [None]:
import matplotlib.pyplot as plt

  
# print ('predicted_results_test_shape:', predicted_results_test.shape) 
# print ('predicted_results_test:',predicted_results_test)
    
# set sample number
sample_number = 150 
L = np.arange(sample_number)                                                                                           
                                                                                           
# set random sample_id                                                                                            
sample_id = np.random.randint(0,(1000 - sample_number + 1) )

print ('sample_id:',sample_id)
                                                                                           
fig = plt.figure(figsize=(9,6))
styles = ['r-','b-', 'k-', 'y-', 'c-', 'm-', 'y-', 'b-', 'g-','c--']
methods_fmt = ['LAR_ElasticNet', 'Lasso','XGBoost','LightGBM', 'MLR', 'Ridge_Regression', 'Random_Forest', 'MLP','NN', 'Ensemble']

os.chdir(config_final.path)


# create a new figure instance[Figure], figsize is width, height(in inches) of Figure
fig = plt.figure(figsize = (13, 7))

# adding sub-plot instance(Axes) and its coordinate axis [left, bottom, width, height]
ax1 = fig.add_axes([0.1, 0.1, 1.2, 1.2])
ax2 = fig.add_axes([0.2, 0.77, 0.5, 0.5])


# vv = predicted_results_test[j, sample_id:(sample_id+sample_number)]
# print ('vv_shape:',vv.shape)

# ploting Figure
for j,method in enumerate(methods_fmt):

    # use random predicted result data for training and testing
    ax1.plot(L,predicted_results_test[j, sample_id:(sample_id+sample_number)], styles[j], linewidth = 1, label=method)                                                                                                                                                                                      
    ax2.plot(L,predicted_results_train[j, sample_id:(sample_id+sample_number)], styles[j], linewidth = 1, label=method)

# use random GT result data for training and testing
ax1.plot(L,GT_test[sample_id:(sample_id+sample_number)], 'k:', linewidth = 1.5, label='optimal')                                                                                                                                                                                      
ax2.plot(L,GT_train[sample_id:(sample_id+sample_number)], 'k:', linewidth = 1.5, label='optimal')                                                                                                                                                                                      
                                                                                           

                                                                                           
# set grid    
# ax1.grid()
# ax2.grid()
# set text and axis of Figure
# ax1
ax1.set_title('The Scatterplot of Measured/Calculated Coagulant Doses in Traing and Testing', fontsize=14)
ax1.set_xlim(-1,160)
ax1.set_ylim(20,80)
ax1.set_xlabel('N-th Sample(Testing)',fontsize=14)
ax1.set_ylabel('Coagulant Dosage(mg/L)',fontsize=14)

#ax2
ax2.set_xlabel('N-th Sample(Training)',fontsize=14)                                                                                           
ax2.set_xlim(-1,160)
ax2.set_ylim(20,80)

ax1.legend(loc = 'upper right')

# save
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('Measured and calculated CD_pic.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)
# plt.savefig('previous.png',bbox_inches='tight', pad_inches=0.2, dpi = 300)


plt.show()
plt.close()

## The changes of features in a period of time.  RW: raw water, SW: sedimentation water, C: carbon-processed water...

In [None]:
# get random data(15.5.5-15.5.7) and non-normalized
m.source.attr2source([('2015-05-05 00:00:00','2015-05-07 00:00:00')], 0, 0, 3, data_type='all')
a = m.source.all_x
b = m.source.all_y
print ('a_shape:',a.shape)
print ('b_shape:',b.shape)
print ("loading completely...")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

sns.set_style("ticks")


fig = plt.figure(figsize=(10,25))

axes = plt.subplot(9,1,1)
plt.plot(a[:,0], 'r-', linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('RW-Nitrite (mg/L)',fontsize = 15)
plt.grid(True)

axes = plt.subplot(9,1,2)
plt.plot(a[:,1], 'b-', linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('RW-Conductivity (mg/L)',fontsize = 15)
plt.grid(True)

axes = plt.subplot(9,1,3)
plt.plot(a[:,2], 'm-', linewidth = 1.8)
plt.tick_params(labelsize=15)
plt.ylabel('RW-pH',fontsize = 15)
plt.grid(True)


axes = plt.subplot(9,1,4)
plt.plot(a[:,3], 'c-', linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('RW-Turbidity',fontsize = 15)
plt.grid(True)


axes = plt.subplot(9,1,5)
plt.plot(a[:,4], 'm-', linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('RW-Dissolved Oxygen',fontsize = 15)
plt.grid(True)


axes = plt.subplot(9,1,6)
plt.plot(a[:,5], 'r-', linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('SW-Turbidity',fontsize = 15)
plt.grid(True)


axes = plt.subplot(9,1,7)
plt.plot(a[:,6], 'b--',linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('SW-Dissolved Oxygen',fontsize = 15)
plt.grid(True)


axes = plt.subplot(9,1,8)
plt.plot(a[:,7], 'c--', linewidth = 1.8)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('C-Turbidity',fontsize = 15)
plt.grid(True)

    
plt.subplot(9,1,9)
plt.plot(b, 'k-', linewidth = 2)
sns.despine()
plt.tick_params(labelsize=15)
plt.ylabel('Optimal coagulant dosage',fontsize = 15)
plt.xlabel('Sample',fontsize = 15)
plt.grid(True)

    
plt.tight_layout()
# save .pdf
os.chdir('/home/yqliu/Yiwei_water/results')
fig.savefig('Fetures_values_pic.pdf', bbox_inches='tight', pad_inches=0.2, dpi=300)
plt.show() 
plt.close()