In [1]:
import warnings
import itertools
import pandas
import math
import sys
import os
import numpy as np

In [2]:
def make_result_files(input_xlsx_file, input_csv_file, output_csv_file):
    input_xlsx_DataFrame = pandas.read_excel(input_xlsx_file)
    training_data_size = list(input_xlsx_DataFrame.columns.values)[-1]
    model_input = input_xlsx_DataFrame[training_data_size][3]
    
    training_data_size = int(training_data_size.split(': ')[1])
    model_input = model_input.split(': ')[1]
    model_input = int(model_input.split('x')[0])*int(model_input.split('x')[1])

    time_offset = training_data_size + model_input
    print("Time offset in " +input_xlsx_file + " : " + str(time_offset) )
    
    input_csv_DataFrame = pandas.read_csv(input_csv_file)
    
    xlsx_input_values = np.array(input_xlsx_DataFrame['Value'])
    xlsx_anomaly_detections = np.array(input_xlsx_DataFrame['DTW Detection'])
    xlsx_DTW_similarity = np.array(input_xlsx_DataFrame['DTW Similarity'])
    xlsx_label = np.array(input_xlsx_DataFrame['Anomaly'])
    csv_values = np.array(input_csv_DataFrame['value'])
    
    if csv_values[time_offset] != xlsx_input_values[0]:
        print(" ### ERROR ### : time offset mismatch in " + input_xlsx_file)
        print("offset cvs value : "+str(xlsx_input_values[0]))
        print("offset xlsx value : "+str(csv_values[time_offset]))
#         return 1
    else:
        print("time offset is correct in " + input_xlsx_file)
    
    anomaly_score = np.zeros(len(csv_values))
    label = np.zeros(len(csv_values))
    for i in range(0, len(csv_values)-1):
        tmp = xlsx_anomaly_detections[i]
        if not np.isnan(tmp):
            anomaly_score[i+time_offset] = 1
        if not np.isnan(xlsx_label[i]):
            label[i] = 1
    
    output_csv_DataFrame =  pandas.DataFrame(
                        dict(value=csv_values,
                            anomaly_score=anomaly_score,
                            DTW_similarity=xlsx_DTW_similarity,
                            label=label),
                            index=input_csv_DataFrame['timestamp'])
    output_csv_DataFrame.to_csv(output_csv_file)
    print("Done writing " + output_csv_file)
    return 0

def change_file_from_xlsx_to_csv(xlsx_file):
    xlsx_file = xlsx_file[:-4]
    return xlsx_file + 'csv'

def get_output_csv_file(input_csv,detector_name):
    input_csv = input_csv.split('/')
    input_csv = input_csv[0] +'/'+ input_csv[1] +'/'+ detector_name +'/'+ input_csv[3] +'/'+ detector_name +'_'+ input_csv[4]
    return input_csv

def get_summary_file(detector):
    detector_summary_file = "../results/numentaTM/numentaTM_standard_scores.csv"
    detector_summary_file = detector_summary_file.split('/')
    file_name = detector_summary_file[3].split('_')
    file_name = detector + '_' + file_name[1] + '_' + file_name[2]
    detector_summary_file = detector_summary_file[0] +'/'+ detector_summary_file[1] +'/'+ detector +'/'+ file_name
    return detector_summary_file

def generate_final_score_summary(detectors, outputfile):
         
    final_DataFrame = pandas.read_csv(get_summary_file(detectors[0]))
    Summary_colomns = {}
    for d in detectors:
        DataFrame = pandas.read_csv(get_summary_file(d), index_col='File')
        Summary_colomns[d] = DataFrame['Score']
    
    final_DataFrame =  pandas.DataFrame(Summary_colomns, index=final_DataFrame['File'])
    
    final_DataFrame.to_csv(outputfile)
    print("Done writing " + outputfile)

In [6]:
input_xlsx_files = [
    # 2 offset erros are due to rounding off
    "../results/LSTMCNnet original/artificialNoAnomaly/art_daily_no_noise.xlsx",
    "../results/LSTMCNnet original/artificialNoAnomaly/art_daily_perfect_square_wave.xlsx",
    "../results/LSTMCNnet original/artificialNoAnomaly/art_daily_small_noise.xlsx",  # offset error
    "../results/LSTMCNnet original/artificialNoAnomaly/art_flatline.xlsx",
    "../results/LSTMCNnet original/artificialNoAnomaly/art_noisy.xlsx",  # offset error
    
    # 2 offset erros are due to rounding off
    "../results/LSTMCNnet original/artificialWithAnomaly/art_daily_flatmiddle.xlsx",
    "../results/LSTMCNnet original/artificialWithAnomaly/art_daily_jumpsdown.xlsx",  # offset error
    "../results/LSTMCNnet original/artificialWithAnomaly/art_daily_jumpsup.xlsx",
    "../results/LSTMCNnet original/artificialWithAnomaly/art_daily_nojump.xlsx",  # offset error
    "../results/LSTMCNnet original/artificialWithAnomaly/art_increase_spike_density.xlsx",
    "../results/LSTMCNnet original/artificialWithAnomaly/art_load_balancer_spikes.xlsx",

    # 6 offset erros are due to rounding off
    "../results/LSTMCNnet original/realAdExchange/exchange-2_cpc_results.xlsx",  # offset error
    "../results/LSTMCNnet original/realAdExchange/exchange-2_cpm_results.xlsx",  # offset error
    "../results/LSTMCNnet original/realAdExchange/exchange-3_cpc_results.xlsx",  # offset error
    "../results/LSTMCNnet original/realAdExchange/exchange-3_cpm_results.xlsx",  # offset error
    "../results/LSTMCNnet original/realAdExchange/exchange-4_cpc_results.xlsx",  # offset error
    "../results/LSTMCNnet original/realAdExchange/exchange-4_cpm_results.xlsx",  # offset error
    
    # error 2 offset erros CRITICAL ERROR 
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_5f5533.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_24ae8d.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_53ea38.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_77c1ca.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_825cc2.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_ac20cd.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_c6585a.xlsx",  # offset error CRITICAL ERROR
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_cpu_utilization_fe7f93.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_disk_write_bytes_1ef3de.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_disk_write_bytes_c0d644.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_network_in_5abac7.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/ec2_network_in_257a54.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/elb_request_count_8c0756.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/grok_asg_anomaly.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/iio_us-east-1_i-a2eb1cd9_NetworkIn.xlsx",  # offset error
    "../results/LSTMCNnet original/realAWSCloudwatch/rds_cpu_utilization_cc0c53.xlsx",
    "../results/LSTMCNnet original/realAWSCloudwatch/rds_cpu_utilization_e47b3b.xlsx",
    
    # 2 offset erros are due to rounding off
    "../results/LSTMCNnet original/realKnownCause/ambient_temperature_system_failure.xlsx",  # offset error
    "../results/LSTMCNnet original/realKnownCause/cpu_utilization_asg_misconfiguration.xlsx",
    "../results/LSTMCNnet original/realKnownCause/ec2_request_latency_system_failure.xlsx",
    "../results/LSTMCNnet original/realKnownCause/machine_temperature_system_failure.xlsx", # offset error
    "../results/LSTMCNnet original/realKnownCause/nyc_taxi.xlsx",
    "../results/LSTMCNnet original/realKnownCause/rogue_agent_key_hold.xlsx",
    "../results/LSTMCNnet original/realKnownCause/rogue_agent_key_updown.xlsx",

    # no errors
    "../results/LSTMCNnet original/realTraffic/occupancy_6005.xlsx",
    "../results/LSTMCNnet original/realTraffic/occupancy_t4013.xlsx",
    "../results/LSTMCNnet original/realTraffic/speed_6005.xlsx",
    "../results/LSTMCNnet original/realTraffic/speed_7578.xlsx",
    "../results/LSTMCNnet original/realTraffic/speed_t4013.xlsx",
    "../results/LSTMCNnet original/realTraffic/TravelTime_387.xlsx",
    "../results/LSTMCNnet original/realTraffic/TravelTime_451.xlsx",
    
#     # no errors
    "../results/LSTMCNnet original/realTweets/Twitter_volume_AAPL.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_AMZN.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_CRM.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_CVS.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_FB.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_GOOG.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_IBM.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_KO.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_PFE.xlsx",
    "../results/LSTMCNnet original/realTweets/Twitter_volume_UPS.xlsx"
]

for f in input_xlsx_files:
    input_csv = change_file_from_xlsx_to_csv(f)
    output_csv = get_output_csv_file(input_csv,'LSTMCNnet')
    make_result_files(f,input_csv,output_csv)

Time offset in ../results/LSTMCNnet original/artificialNoAnomaly/art_daily_no_noise.xlsx : 620
time offset is correct in ../results/LSTMCNnet original/artificialNoAnomaly/art_daily_no_noise.xlsx
Done writing ../results/LSTMCNnet/artificialNoAnomaly/LSTMCNnet_art_daily_no_noise.csv
Time offset in ../results/LSTMCNnet original/artificialNoAnomaly/art_daily_perfect_square_wave.xlsx : 620
time offset is correct in ../results/LSTMCNnet original/artificialNoAnomaly/art_daily_perfect_square_wave.xlsx
Done writing ../results/LSTMCNnet/artificialNoAnomaly/LSTMCNnet_art_daily_perfect_square_wave.csv
Time offset in ../results/LSTMCNnet original/artificialNoAnomaly/art_daily_small_noise.xlsx : 620
 ### ERROR ### : time offset mismatch in ../results/LSTMCNnet original/artificialNoAnomaly/art_daily_small_noise.xlsx
offset cvs value : 19.9019
offset xlsx value : 19.901902860899998
Done writing ../results/LSTMCNnet/artificialNoAnomaly/LSTMCNnet_art_daily_small_noise.csv
Time offset in ../results/LSTMC

Time offset in ../results/LSTMCNnet original/realAWSCloudwatch/ec2_disk_write_bytes_c0d644.xlsx : 630
time offset is correct in ../results/LSTMCNnet original/realAWSCloudwatch/ec2_disk_write_bytes_c0d644.xlsx
Done writing ../results/LSTMCNnet/realAWSCloudwatch/LSTMCNnet_ec2_disk_write_bytes_c0d644.csv
Time offset in ../results/LSTMCNnet original/realAWSCloudwatch/ec2_network_in_5abac7.xlsx : 640
time offset is correct in ../results/LSTMCNnet original/realAWSCloudwatch/ec2_network_in_5abac7.xlsx
Done writing ../results/LSTMCNnet/realAWSCloudwatch/LSTMCNnet_ec2_network_in_5abac7.csv
Time offset in ../results/LSTMCNnet original/realAWSCloudwatch/ec2_network_in_257a54.xlsx : 620
time offset is correct in ../results/LSTMCNnet original/realAWSCloudwatch/ec2_network_in_257a54.xlsx
Done writing ../results/LSTMCNnet/realAWSCloudwatch/LSTMCNnet_ec2_network_in_257a54.csv
Time offset in ../results/LSTMCNnet original/realAWSCloudwatch/elb_request_count_8c0756.xlsx : 460
time offset is correct in ..

time offset is correct in ../results/LSTMCNnet original/realTweets/Twitter_volume_KO.xlsx
Done writing ../results/LSTMCNnet/realTweets/LSTMCNnet_Twitter_volume_KO.csv
Time offset in ../results/LSTMCNnet original/realTweets/Twitter_volume_PFE.xlsx : 1200
time offset is correct in ../results/LSTMCNnet original/realTweets/Twitter_volume_PFE.xlsx
Done writing ../results/LSTMCNnet/realTweets/LSTMCNnet_Twitter_volume_PFE.csv
Time offset in ../results/LSTMCNnet original/realTweets/Twitter_volume_UPS.xlsx : 800
time offset is correct in ../results/LSTMCNnet original/realTweets/Twitter_volume_UPS.xlsx
Done writing ../results/LSTMCNnet/realTweets/LSTMCNnet_Twitter_volume_UPS.csv


In [3]:
# Have to add label or run python run.py --optimize -d LSTMCNnet
# generate score by python run.py --score -d LSTMCNnet
generate_final_score_summary(['numenta','LSTMCNnet','TSA_ARMA'],'../results/score_summary.csv')

Done writing ../results/score_summary.csv
