In [1]:
from _utils import *
from config import *

import time
import argparse
import warnings
warnings.filterwarnings('ignore')

np.random.seed(28)

# 1 Functions

In [2]:
class args_config():
    def __init__(self, dataset_name):
        self.model_name = 'MEAN'
        self.model_result_save_path = os.path.join(result_folder_path, self.model_name)  
        create_folder(self.model_result_save_path)

        self.dataset_name = dataset_name
        self.dataset_result_save_path = os.path.join(self.model_result_save_path, self.dataset_name) 
        create_folder(self.dataset_result_save_path)
    
    def experiment_config(self, experiment_name):  
        self.experiment_result_save_path = os.path.join(self.dataset_result_save_path, experiment_name)
        create_folder(self.experiment_result_save_path)

        data_complete_path = os.path.join(self.experiment_result_save_path, 'data_complete.csv')  
        metrics_analysis_path = os.path.join(self.experiment_result_save_path, 'metrics_analysis')  
        create_folder([metrics_analysis_path])
        
        parser = argparse.ArgumentParser(description='data complete configs')
        parser.add_argument('--data_complete_path', type=str, default=data_complete_path)
        parser.add_argument('--metrics_analysis_path', type=str, default=metrics_analysis_path)
        args = parser.parse_known_args()[0]
        return args

In [3]:
flow_sim_args = args_config('flow_sim')
flow_zcity_args = args_config('flow_zcity')
pres_sim_args = args_config('pres_sim')
pres_zcity_args = args_config('pres_zcity')

In [4]:
global_metrics_all_sensor_df = pd.DataFrame()

In [5]:
def compute_true_metrics(raw_data_value, denorm_sparse_value, denorm_data_complete_value):
    pos_test = np.where((raw_data_value != 0) & (denorm_sparse_value == 0))
    true_mape = compute_mape(raw_data_value[pos_test], denorm_data_complete_value[pos_test])
    true_rmse = compute_rmse(raw_data_value[pos_test], denorm_data_complete_value[pos_test])
    true_smape = compute_smape(raw_data_value[pos_test], denorm_data_complete_value[pos_test])
    return true_mape, true_rmse, true_smape

In [6]:
class MEAN_dataComplete():

    def __init__(self, dataset_name, experiment_name, experiment_args):

        if dataset_name == 'flow_sim':
            self.raw_dataset_path = flow_sim_path
        elif dataset_name == 'pres_sim':
            self.raw_dataset_path = pres_sim_path
        elif dataset_name == 'flow_zcity':
            self.raw_dataset_path = flow_zcity_path
        elif dataset_name == 'pres_zcity':
            self.raw_dataset_path = pres_zcity_path

        self.experiment_args = experiment_args

        self.raw_data_df = read_csv_data(self.raw_dataset_path)

        self.sparse_for_completing_path = os.path.join(dataset_sparse_path, dataset_name, experiment_name, 'denorm_sparse_for_completing.csv')
        self.sparse_data_df = read_csv_data(self.sparse_for_completing_path)

        self.data_complete_path = self.experiment_args.data_complete_path
        self.metrics_analysis_path = self.experiment_args.metrics_analysis_path

        self.name = dataset_name + '_' + experiment_name 

        self.mean_data_complete()

    def mean_data_complete(self):
        mape_single_sensor_df = pd.DataFrame(columns=self.raw_data_df.columns)
        rmse_single_sensor_df = pd.DataFrame(columns=self.raw_data_df.columns)
        smape_single_sensor_df = pd.DataFrame(columns=self.raw_data_df.columns)
        metrics_analysis_df = pd.DataFrame(columns=['MAPE', 'RMSE', 'SMAPE'])

        mean_data_df = self.sparse_data_df.copy()
        for column in mean_data_df.columns:
            mean_data_df[column].replace(0, np.nan, inplace=True)
            mean_data_df[column].fillna(mean_data_df[column].mean(), inplace=True)

        mean_data_df.to_csv(self.data_complete_path, index=True, header=True)
        mape_all_sensors, rmse_all_sensors, smape_all_sensors = compute_true_metrics(self.raw_data_df.values, self.sparse_data_df.values, mean_data_df.values)
        mape_single_sensor_list, rmse_single_sensor_list, smape_single_sensor_list = result_analysis(self.raw_data_df, self.sparse_data_df, mean_data_df)

        print('MAPE: %.6f, RMSE: %.6f, SMAPE: %.6f' % (mape_all_sensors, rmse_all_sensors, smape_all_sensors))
        metrics_analysis_df.loc['mean'] = [mape_all_sensors, rmse_all_sensors, smape_all_sensors]
        mape_single_sensor_df.loc['mean'] = mape_single_sensor_list
        rmse_single_sensor_df.loc['mean'] = rmse_single_sensor_list
        smape_single_sensor_df.loc['mean'] = smape_single_sensor_list

        metrics_analysis_df.to_csv(os.path.join(self.metrics_analysis_path, 'metrics_analysis.csv'), index=True, header=True)
        mape_single_sensor_df.to_csv(os.path.join(self.metrics_analysis_path, 'mape_single_sensor.csv'), index=True, header=True)
        rmse_single_sensor_df.to_csv(os.path.join(self.metrics_analysis_path, 'rmse_single_sensor.csv'), index=True, header=True)
        smape_single_sensor_df.to_csv(os.path.join(self.metrics_analysis_path, 'smape_single_sensor.csv'), index=True, header=True)

        global_metrics_all_sensor_df.loc[self.name, 'MAPE'] = mape_all_sensors
        global_metrics_all_sensor_df.loc[self.name, 'RMSE'] = rmse_all_sensors
        global_metrics_all_sensor_df.loc[self.name, 'SMAPE'] = smape_all_sensors

# 2 Experiments

-----

In [7]:
experiment_name_list = ['random_0.3', 'random_0.6', 'random_0.9', 'long_range_0.3', 'long_range_0.6', 'block_0.3', 'block_0.6', 'mix_0.3', 'mix_0.5', 'mix_0.7']
dataset_name_list = ['flow_sim', 'pres_sim', 'flow_zcity', 'pres_zcity']

In [None]:
for dataset_name in dataset_name_list:
    for experiment_name in experiment_name_list:
        print('\nThe %s dataset and %s experiment is running...' % (dataset_name, experiment_name))
        if dataset_name == 'flow_sim':
            experiment_args = flow_sim_args.experiment_config(experiment_name)
        elif dataset_name == 'pres_sim':
            experiment_args = pres_sim_args.experiment_config(experiment_name)
        elif dataset_name == 'flow_zcity':
            experiment_args = flow_zcity_args.experiment_config(experiment_name)
        elif dataset_name == 'pres_zcity':
            experiment_args = pres_zcity_args.experiment_config(experiment_name)

        MEAN_dataComplete(dataset_name, experiment_name, experiment_args)
        print('The %s dataset and %s experiment has been completed!' % (dataset_name, experiment_name))

global_metrics_all_sensor_df.to_csv(os.path.join(result_folder_path, 'MEAN', 'global_metrics_all_sensor.csv'), index=True, header=True)