# Ablation Study on Auto-Adjustable and Time-Consistent Long Short-Term Memory (AATC-LSTM)

In [None]:
import torch
import numpy as np
import torch.nn as nn 
from DataSet import DataSet

# Deep + ANFIS
from models import AALSTM
from models import LSTM
from models import TCN
from models import ANFIS
import config
import os.path as osp
import pandas as pd
import xlrd
import os
import math
import logging
import datetime


In [None]:
class Water:
    def __init__(self, excel_files):
        self.source = DataSet(excel_files)
        
      # only init and structure model 
    
      # Integrated: COMPLETE AATCLSTM
      # no_AA: AATCLSTM, WHERE NO AUTO-ADJUST MECHANISM IS PERFORMED
      # no_TC: AATCLSTM, WHERE NO TIME-CONSISTENT TERM IS PERFORMED
      # no_AA_TC: AATCLSTM, WHERE NO AUTO-ADJUST MECHANISM AND TIME-CONSISTENT TERM ARE PERFORMED 
      # no_lambda2: AATCLSTM, WHERE NO COEFFICIENT OF REGULAR TERM LAMBDA2 IS PERFORMED
      # no_lambda1: AATCLSTM, WHERE NO COEFFICIENT OF REGULAR TERM LAMBDA1 IS PERFORMED
      
    
    def build_model(self, method, source):
        
        if method == 'Integrated':
            
            integrated_rmse = self.aalstmTraining(source, aa_flag = True, tc_flag = True, l1_flag = True, l2_flag = True)            
            return integrated_rmse           
                
        if method == 'no_AA':
            
            no_aa_rmse = self.tclstmTraining(source, aa_flag = False, tc_flag = True, l1_flag = True, l2_flag = True)
            return no_aa_rmse
        
        if method=='no_TC':
            
            no_tc_rmse = self.lstmTraining(source, aa_flag = True, tc_flag = False, l1_flag = True, l2_flag = True)
            return no_tc_rmse
                
        if method=='no_AA_TC':
            
            no_aa_tc_rmse = self.lstmTraining(source, aa_flag = False, tc_flag = True, l1_flag = True, l2_flag = True)
            return no_aa_tc_rmse
        
        if method=='no_lambda2':
            
            no_lambda2_rmse = self.tclstmTraining(source, aa_flag = True, tc_flag = True, l1_flag = True, l2_flag = False)
            return no_lambda2_rmse
                                           
        if method=='no_lambda1':
            
            no_lambda1_rmse = self.tclstmTraining(source, aa_flag = True, tc_flag = True, l1_flag = False, l2_flag = True)
            return no_lambda1_rmse

        
    def aalstmTraining(self, source, flag_aa, flag_tc, flag_l1, flag_l2):
        
        valX = source.dagger_x_test
        valY = source.dagger_y_test           
        val_dagger_save_metric = []
        aatc_metric = np.zeros([4])
        self.bestScore_dagger = 1000          
        self.in_features = valX.size(2)
        self.epoch = 100
        self.batchSize_train = 128
        self.batchSize_val = 16
        self.trainSeq =  self.seq_abrupt = source.deep_dagger_x.size(1)
        self.testSeq = valX.size(1)

        mse_criterion = nn.MSELoss()
                    
        # judge L1      
        if flag_l1==True:
            lambda1 = [1e-3, 1e-2, 1e-1, 0, 1, 1e1]

        else:
            lambda1 = 0
        
        # judge L2
        if flag_l2==True:
            lambda2 = [1e-3, 1e-2, 1e-1, 0, 1, 1e1]
            
        else:
            lambda2 = 0
        
        # judge AA
        if flag_aa==True:
            aalstm = AALSTM(self.in_features, self.trainSeq, self.testSeq).cuda().double()
       
        else:
            aalstm = LSTM(self.in_features, self.trainSeq, self.testSeq).cuda().double()
        
        
        optimizer_dagger = torch.optim.Adam(aalstm.parameters(), lr = 1e-2)


        loss_aatc_save = []
        loss_dag_sum = 0
        count_dag = 0

        for i in range(self.epoch):

            be_idx = 0

            if i==0:
                trainX_dag = source.deep_dagger_x
                trainY_dag = source.deep_dagger_y
            else:
                trainX_dag = source.dagger_shuffle[:,:,:-1]
                trainY_dag = source.dagger_shuffle[:,:,-1]

            while(be_idx + self.batchSize_train) <= (trainX_dag.size(0) - 1):
                optimizer_dagger.zero_grad()
                
                ed_idx = be_idx + self.batchSize_train、                
                out = aalstm(trainX_dag[be_idx:ed_idx,:,:], trainY_dag[be_idx:ed_idx,:], True)
                
                # judge TC 
                if flag_tc == True:           
                    AD_rate = cal_AC(trainX_dag[be_idx:(ed_idx + 1),:,:], self.batch_size_train, self.seq_train, 'batch')         

                else:
                    AD_rate = 1                

                dag_params1 = aalstm.parameters()
                L1_dag_sum = self.sum_params4L(dag_params1, 'L1')
                dag_params2 = aalstm.parameters()
                L2_dag_sum = self.sum_params4L(dag_params2, 'L2')


                # Total Loss (TC Term)
                loss_dag = AD_rate * mse_criterion(out, trainY_dag[be_idx:ed_idx,:]) + lambda1[1] * L1_dag_sum \
                + lambda2[2] * L2_dag_sum

                loss_dag.backward()
                optimizer_dagger.step()
                loss_dag_sum = loss_dag + loss_dag_sum                    
                be_idx = ed_idx
                count_dag = count_dag + 1

            average_dag_loss = loss_dag_sum / count_dag
            epoch_num = 'epoch' + str(i)
            loss_aatc_save.append(average_dag_loss)

            loss_dag_sum = 0
            count_dag = 0


            ## validation
            if i >= 5 and i%5==0:

                batch_dag_val_b = 0
                rmse_dag_score_total = 0
                mape_dag_score_total = 0
                corr_dag_score_total = 0
                wi_dag_score_total = 0
                count_dag_val = 0

                val_oneBatch_dagger_metric = np.zeros([4])

                while (batch_dag_val_b + self.batchSize_val) <= valX.size(0):
                    with torch.no_grad():
                        batch_dag_val_e = batch_dag_val_b + self.batchSize_val
                        valXSlice = valX[batch_dag_val_b:batch_dag_val_e,:,:]
                        valYSlice = valY[batch_dag_val_b:batch_dag_val_e]

                        val_dag_out = daggertenlar(valXSlice, valYSlice, False)

                        # calculate metrics for deep
                        rmse_dag_score = self.cal_rmse4deep(val_dag_out, valYSlice, 'rmse')
                        mape_dag_score = self.cal_rmse4deep(val_dag_out, valYSlice, 'mape')
                        corr_dag_score = self.cal_rmse4deep(val_dag_out, valYSlice, 'correlation')
                        wi_dag_score = self.cal_rmse4deep(val_dag_out, valYSlice, 'wi')

                        rmse_dag_score_total = rmse_dag_score_total + rmse_dag_score
                        mape_dag_score_total = mape_dag_score_total + mape_dag_score
                        corr_dag_score_total = corr_dag_score_total + corr_dag_score
                        wi_dag_score_total = wi_dag_score_total + wi_dag_score

                        batch_dag_val_b = batch_dag_val_e
                        count_dag_val = count_dag_val + 1

                #     
                val_oneBatch_dagger_metric[0] = rmse_dag_av_score = rmse_dag_score_total / count_dag_val
                val_oneBatch_dagger_metric[1] = mape_dag_av_score = mape_dag_score_total / count_dag_val
                val_oneBatch_dagger_metric[2] = corr_dag_av_score = corr_dag_score_total / count_dag_val
                val_oneBatch_dagger_metric[3] = wi_dag_av_score = wi_dag_score_total / count_dag_val                       

    #     

                if rmse_dag_av_score < self.bestScore_dagger:
                    print ('validation_dagger_best_score:', rmse_dag_av_score)
                    print ('current_best_epoch:', i)

                    val_dagger_save_metric.append(val_oneBatch_dagger_metric)
                    self.bestScore_dagger = rmse_dag_av_score
        # 
        val_save_metric_dagger = np.concatenate(val_dagger_save_metric)
        val_save_metric_dagger = val_save_metric_dagger.reshape(-1, 4)

        for i in range(val_save_metric_dagger.shape[1]):
            val_save_metric_dagger_slice = val_save_metric_dagger[:, i]
            val_save_metric_dagger_slice.sort(0)

            if i==0 or i==1:
                aatc_metric[i] = val_save_metric_dagger_slice[0][i]
            else:
                aatc_metric[i] = val_save_metric_dagger_slice[-1][i]
                
         # selecting RMSE
         aatc_rmse = aatc_metric[0]
                       
        return aatc_rmse   

    
    
    def evaluate(self, pred_y, true_y, metrics):
        pred_y = pred_y.reshape([-1])
        true_y = true_y.reshape([-1])
        metrics_num = len(metrics)
        results = np.zeros([metrics_num])
        for i,metric in enumerate(metrics):
            if metric == 'rmse':
                results[i] = np.sqrt(np.mean(np.power(pred_y - true_y,2)))
            elif metric == 'mape':
                results[i] = np.mean(np.abs(true_y - pred_y) / true_y)*100
            elif metric == 'correlation':
                results[i] = np.corrcoef(pred_y, true_y)[0,1]
            elif metric == 'wi':
                results[i] = np.sum(np.power(true_y - pred_y, 2)) / np.sum(np.power(np.abs(pred_y - np.mean(true_y)) + np.abs(true_y-np.mean(true_y)), 2))
        return results      
          
        
    def run_experiments(self, methods, experiments, repeat_num, metrics):
        # dic type
        results = {}
        for exp_name in experiments.keys():
            # exp_name is dict_keys type
            exp = experiments[exp_name]
            train_time, test_time = exp['times']
            evaluation = np.zeros([repeat_num, len(exp['lag_time']), len(exp['clustering']), len(exp['normalize']), len(methods), (len(metrics)-3)])
            print ('repeat_num:',repeat_num)
                        
            for r in range(repeat_num):
                for i,lag_time in enumerate(exp['lag_time']):
                    for j,clustering in enumerate(exp['clustering']):
                        for k,normalize in enumerate(exp['normalize']):
                            
                                    # time-series data
                            self.source.attr2source(train_time, 
                                                    normalize,
                                                    0, 
                                                    lag_time,
                                                    data_type = 'time-series')                            
                            
                            # general training data
                            self.source.attr2source(train_time, 
                                                    normalize,
                                                    clustering, 
                                                    lag_time,
                                                    data_type = 'ts-train')
                            # general testing data
                            self.source.attr2source(test_time,
                                                     normalize, 
                                                     clustering, 
                                                     lag_time,
                                                     data_type = 'ts-test')
                                                    
                   
                            # print and saving all ablative models
                            for t, method in enumerate(methods):
                                                
                                evaluation[r, i, j, k, t, :] = self.build_model(method, self.source) 
               
                                print('exp_name={}, lag_time={}, normalize={}, clustering={}, method={} :{}'.format(
                                    exp_name, lag_time, normalize, clustering, method, evaluation[r, i, j, k, t, :]))
                                
            # save results as a dictionary 
            np.save('result/{}.npy'.format(exp_name), evaluation)            


    def decimal2round2(self, results):
        sh = results.shape
        fraction = results.reshape(-1)
        for m in range(len(fraction)):
            fraction[m] = round(fraction[m],8)
        fractions = fraction.reshape(sh)
#         print ('fractions_shape',fractions.shape)
        return fractions 


In [None]:
os.chdir(config_ablation.path)
os.environ["CUDA_VISIBLE_DEVICES"]="3"
m = Water(config_ablation.excel_files)
m.run_experiments(config_ablation.methods, 
                  config_ablation.experiments,
                  config_ablation.repeat_num,
                  config_ablation.metrics)

## Calculating dRMSE
## $dRMSE = {\vert RMSE - RMSE_{w/o} \vert}$

In [2]:

# loading Spring as ablative dataset
results_spring = np.load(osp.join('result', 'spring.npy'))
result_spring  = results_spring.mean(0).squeeze()

# RMSE
RMSE = result_spring[0]

# calculating dRMSE 
dRMSE_no_aa = np.abs(RMSE - result_spring[1]) 
dRMSE_no_tc = np.abs(RMSE - result_spring[2]) 
dRMSE_no_aa = np.abs(RMSE - result_spring[3]) 
dRMSE_no_aa_tc = np.abs(RMSE - result_spring[4]) 
dRMSE_no_lambda2 = np.abs(RMSE - result_spring[5])
dRMSE_no_lambda1 = np.abs(RMSE - result_spring[6]) 

print ('w/o TC:', dRMSE_no_tc)
print ('w/o lambda_2:', dRMSE_no_lambda2)
print ('w/o lambda_1:', dRMSE_no_lambda1)


w/o TC: 0.29
w/o lambda_2: 0.1
w/o lambda_1: 0.05
