In [1]:
import sys, json
from statistics import mean
from time import time
from datetime import datetime
from libDataLoaders import dataset_loader
from libFolding import Folding
from libPMF import EmpiricalPMF
from libSSHMM import SuperStateHMM, frange
from libAccuracy import Accuracy

In [2]:
ε = 0.00021

In [3]:
# python train_SSHMM.py BigO_L01 AMPdsR1_1min_A 10 200 noisy 4 1 BME 
modeldb = 'BigO_L01'
dataset = 'AMPdsR1_1min_A'
precision = float(10)
max_obs = float(200)
denoised = 'noisy' == 'denoised'
max_states = int(4)
folds = int(1)
ids = 'BME'
ids = ids.split(',')
datasets_dir = './datasets/%s.csv'
logs_dir = './logs/%s.log'
models_dir = './models/%s.json'

In [4]:
sshmms = []
train_times = []
folds = Folding(dataset_loader(datasets_dir % dataset, ids, precision, denoised), folds)

Loading AMPds R1 dataset at ./datasets/AMPdsR1_1min_A.csv...
	Setting timestamp column TimeStamp as index.
	Modfity data with precision 10.000000 then convert to int...
	Keeping only columns ['BME'].
	Calculating unmetered column UNE.

Created 1 fold: 524544.


In [5]:
for (fold, priors, testing) in folds: 
    del testing
    tm_start = time()
    
    print()
    print('Creating load PMFs and finding load states...')
    print('\tMax partitions per load =', max_states)
    pmfs = []
    for id in ids:
        pmfs.append(EmpiricalPMF(id, max_obs * precision, list(priors[id])))
        pmfs[-1].quantize(max_states, ε)

    print()
    print('Creating compressed SSHMM...')
    incro = 1 / precision
    sshmm = SuperStateHMM(pmfs, [i for i in frange(0, max_obs + incro, incro)])
    
    print('\tConverting DataFrame in to obs/hidden lists...')
    obs_id = list(priors)[0]
    obs = list(priors[obs_id])
    hidden = [i for i in priors[ids].to_records(index=False)]
    
    sshmm.build(obs, hidden)
    sshmms.append(sshmm)
    
    train_times.append((time() - tm_start) / 60)



************************ VALIDATION ROUND:  1/ 1 ************************

Building priors and testing datasets...

Creating load PMFs and finding load states...
	Max partitions per load = 4
	PMF for BME: [403947, 39227, 10988, 1687, 427, 318, 243, 224, 1722, 120, 619, 142, 66, 5, 3, 2, 1, 4, 3, 7, 19, 32, 13, 4, 5, 98, 62, 93, 49902, 4865, 4152, 249, 1907, 701, 1133, 247, 323, 549, 166, 86, 150, 2, 18, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 1, 0, 1, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2]
	Quantize PMF: BME, epsilon = 0.00336
		S0:     0:    0, peak=    0,  403947 |t|
		S1:     1:    6, peak=    1,   52890 |t|
		S2:     7:   30, peak=   28,   62163 |t|
		S3:    31: 1999, peak=   32,    5544 |t|

Creating compressed SSHMM...
	K = 4 super-states (a sum of 4 states), Km = [4].
	M = 1 with labels ['BME'], N = 2

In [6]:
print()
print('Train Time was', round(sum(train_times), 2), ' min (avg ', round(sum(train_times) / len(train_times), 2), ' min/fold).')


Train Time was 0.15  min (avg  0.15  min/fold).


In [7]:
fn = models_dir % modeldb
print('Converting model %s to JSON for storage in %s...' % (modeldb, fn))
fp = open(fn, 'w')
json.dump(sshmms, fp, default=(lambda o: o._asdict()), sort_keys=True, indent=None, separators=(',', ':'))
fp.close()

report = []
report.append(['Model DB', modeldb])
report.append(['Run Date', datetime.now()])
report.append(['Dataset', dataset])
report.append(['Precision', precision])
report.append(['Max States', max_states])
report.append(['Denoised?', denoised])
report.append(['Model Noise?', ('UNE' in ids)])
report.append(['Folds', folds.folds])
report.append(['IDs', ' '.join(ids)])
report.append(['Train Time', round(sum(train_times), 2)])
report.append(['Avg Time/Fold', round(sum(train_times) / len(train_times), 2)])
report.append(['Avg Load States', round(sum([mean(sshmm.Km) for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['Sum Load States', round(sum([sum(sshmm.Km) for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['Super-States', round(sum([sshmm.K for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['Loads', sshmms[0].M])
report.append(['Obs', sshmms[0].N])
report.append(['Time Len', folds.data_size])
report.append(['P0 Size', round(sum([sshmm.P0.size() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['P0 Non-Zero', round(sum([sshmm.P0.nonzero() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['P0 Sparsity', round(sum([sshmm.P0.sparsity() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['P0 bytes', round(sum([sshmm.P0.bytes() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A Size', round(sum([sshmm.A.size() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A Non-Zero', round(sum([sshmm.A.nonzero() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A Sparsity', round(sum([sshmm.A.sparsity() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['A bytes', round(sum([sshmm.A.bytes() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B Size', round(sum([sshmm.B.size() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B Non-Zero', round(sum([sshmm.B.nonzero() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B Sparsity', round(sum([sshmm.B.sparsity() for sshmm in sshmms]) / len(sshmms), 1)])
report.append(['B bytes', round(sum([sshmm.B.bytes() for sshmm in sshmms]) / len(sshmms), 1)])

Converting model BigO_L01 to JSON for storage in ./models/BigO_L01.json...


In [8]:
report

[['Model DB', 'BigO_L01'],
 ['Run Date', datetime.datetime(2021, 4, 30, 20, 6, 8, 870064)],
 ['Dataset', 'AMPdsR1_1min_A'],
 ['Precision', 10.0],
 ['Max States', 4],
 ['Denoised?', False],
 ['Model Noise?', False],
 ['Folds', 1],
 ['IDs', 'BME'],
 ['Train Time', 0.15],
 ['Avg Time/Fold', 0.15],
 ['Avg Load States', 4.0],
 ['Sum Load States', 4.0],
 ['Super-States', 4.0],
 ['Loads', 1],
 ['Obs', 2001],
 ['Time Len', 524544],
 ['P0 Size', 4.0],
 ['P0 Non-Zero', 4.0],
 ['P0 Sparsity', 0.0],
 ['P0 bytes', 352.0],
 ['A Size', 16.0],
 ['A Non-Zero', 16.0],
 ['A Sparsity', 0.0],
 ['A bytes', 288.0],
 ['B Size', 8004.0],
 ['B Non-Zero', 2117.0],
 ['B Sparsity', 0.7],
 ['B bytes', 3052.0]]

In [9]:
print('-------------------------------- CSV REPORTING --------------------------------')
print()
print(','.join([c[0] for c in report]))
print(','.join([str(c[1]) for c in report]))

-------------------------------- CSV REPORTING --------------------------------

Model DB,Run Date,Dataset,Precision,Max States,Denoised?,Model Noise?,Folds,IDs,Train Time,Avg Time/Fold,Avg Load States,Sum Load States,Super-States,Loads,Obs,Time Len,P0 Size,P0 Non-Zero,P0 Sparsity,P0 bytes,A Size,A Non-Zero,A Sparsity,A bytes,B Size,B Non-Zero,B Sparsity,B bytes
BigO_L01,2021-04-30 20:06:08.870064,AMPdsR1_1min_A,10.0,4,False,False,1,BME,0.15,0.15,4.0,4.0,4.0,1,2001,524544,4.0,4.0,0.0,352.0,16.0,16.0,0.0,288.0,8004.0,2117.0,0.7,3052.0


In [10]:
#TEST Viterbi VITERBI
# python test_Algorithm.py bVa_BigO_L01 BigO_L01 AMPds_1min_A 10 A noisy 8 Viterbi > logs/bVa_BigO_L01.log

In [11]:
test_id ='bVa_BigO_L01'
precision = float(10)
measure = 'A'
denoised = 'noisy'
limit = 8
algo_name = 'Viterbi'
# limit = int(limit)
disagg_algo = getattr(__import__('algo_' + algo_name, fromlist=['disagg_algo']), 'disagg_algo')

In [12]:
print('Using disaggregation algorithm disagg_algo() from %s.' % ('algo_' + algo_name + '.py'))

datasets_dir = './datasets/%s.csv'
logs_dir = './logs/%s.log'
models_dir = './models/%s.json'

Using disaggregation algorithm disagg_algo() from algo_Viterbi.py.


In [13]:
print('Loading saved model %s from JSON storage (%s)...' % (modeldb, models_dir % modeldb))
fp = open(models_dir % modeldb, 'r')
jdata = json.load(fp)
fp.close()
folds = len(jdata)
print('\tModel set for %d-fold cross-validation.' % folds)
print('\tLoading JSON data into SSHMM objects...')

Loading saved model BigO_L01 from JSON storage (./models/BigO_L01.json)...
	Model set for 1-fold cross-validation.
	Loading JSON data into SSHMM objects...


In [14]:
sshmms = []
for data in jdata:
    sshmm = SuperStateHMM()
    sshmm._fromdict(data)
    sshmms.append(sshmm)
del jdata
labels = sshmms[0].labels
print('\tModel lables are: ', labels)

	Model lables are:  ['BME']


In [15]:
print('Testing %s algorithm load disagg...' % algo_name)
acc = Accuracy(len(labels), folds)
test_times = []
indv_tm_sum = 0.0
indv_count = 0
y_noise = 0.0
y_total = 0.0
calc_done = [0,0]
calc_total = [0,0]
unexpected_event = 0
adapted_event = 0
adapted_errors = 0
multi_switches_count = 0

Testing Viterbi algorithm load disagg...


In [16]:
folds = Folding(dataset_loader(datasets_dir % dataset, labels, precision, denoised), folds)
for (fold, priors, testing) in folds: 
    del priors
    tm_start = time()
    
    sshmm = sshmms[fold]
    obs_id = list(testing)[0]
    obs = list(testing[obs_id])
    hidden = [i for i in testing[labels].to_records(index=False)]
    
    print()
    print('Begin evaluation testing on observations, compare against ground truth...')
    print()
    pbar = ''
    pbar_incro = len(testing) // 20
    for i in range(1, len(obs)):
        multi_switches_count += (sum([i != j for (i, j) in list(zip(hidden[i - 1], hidden[i]))]) > 1)
        
        y0 = obs[i - 1]
        y1 = obs[i]
        
        start = time() 
        (p, k, Pt, cdone, ctotal) = disagg_algo(sshmm, [y0, y1])
        elapsed = (time() - start)

        s_est = sshmm.detangle_k(k)
        y_est = sshmm.y_estimate(s_est, breakdown=True)
        
        y_true = hidden[i]
        s_true = sshmm.obs_to_bins(y_true)

        acc.classification_result(fold, s_est, s_true, sshmm.Km)
        acc.measurement_result(fold, y_est, y_true)

        calc_done[0] += cdone[0]
        calc_done[1] += cdone[1]
        calc_total[0] += ctotal[0]
        calc_total[1] += ctotal[1]
        
        if p == 0.0:
            unexpected_event += 1
            
        indv_tm_sum += elapsed
        indv_count += 1
        
        y_noise += round(y1 - sum(y_true), 1)
        y_total += y1
        
        if not i % pbar_incro or i == 1:
            pbar += '=' #if i > 1 else ''
            disagg_rate = float(indv_tm_sum) / float(indv_count)
            print('\r\tCompleted %2d/%2d: [%-20s], Disagg rate: %12.6f sec/sample ' % (fold + 1, folds.folds, pbar[:20], disagg_rate), end='', flush=True)
            sys.stdout.flush()

        if limit != 'all' and i >= limit:
            print('\n\n *** LIMIT SET: Only testing %d obs. Testing ends now!' % limit)
            break;
                
    test_times.append((time() - tm_start) / 60)

    if limit != 'all' and i >= limit:
        break;


Loading AMPds R1 dataset at ./datasets/AMPdsR1_1min_A.csv...
	Setting timestamp column TimeStamp as index.
	Modfity data with precision 10.000000 then convert to int...
	Keeping only columns ['BME'].
	Denoising aggregate meter column WHE.
	Calculating unmetered column UNE.

Created 1 fold: 524544.


************************ VALIDATION ROUND:  1/ 1 ************************

Building priors and testing datasets...

Begin evaluation testing on observations, compare against ground truth...

	Completed  1/ 1: [=                   ], Disagg rate:     0.000000 sec/sample 

 *** LIMIT SET: Only testing 8 obs. Testing ends now!



In [17]:
print('Evaluation and accuracy testing complete:')
disagg_rate = indv_tm_sum / indv_count
print('\tTest Time was', round(sum(test_times), 2), ' min (avg ', round(sum(test_times) / len(test_times), 2), ' min/fold).')
if calc_total[0] > 0 and calc_total[1] > 0:
    print('\tOptimization (Time) - Viterbi Part 1:',  round((calc_total[0] - calc_done[0]) / calc_total[0] * 100, 2), '% saved, ', format(calc_done[0], ',d'), 'calculations (average', round(calc_done[0] / indv_count, 1), 'calculations each time)')
    print('\tOptimization (Time) - Viterbi Part 2:',  round((calc_total[1] - calc_done[1]) / calc_total[1] * 100, 2), '% saved, ', format(calc_done[1], ',d'), 'calculations (average', round(calc_done[1] / indv_count, 1), 'calculations each time)')
else:
    print('\tOptimization (Time): NOT BEING TRACKED!')
print('\tUnexpected events =', unexpected_event, ', Multiple switch events =', multi_switches_count, ', Adapted events =', adapted_event, '(errors =', adapted_errors, ')')

acc.print(test_id, labels, measure)

report = []
report.append(['Test ID', test_id])
report.append(['Run Date', datetime.now()])
report.append(['Dataset', dataset])
report.append(['Precision', precision])
report.append(['Denoised?', denoised])
report.append(['Model Noise?', ('UNE' in labels)])
report.append(['Limit', limit])
report.append(['Algorithm', algo_name])
report.append(['Folds', folds.folds])
report.append(['Measure', measure])
report.append(['Tests', indv_count])
report.append(['Total Calc Vp1', calc_total[0]])
report.append(['Actual Calc Vp1', calc_done[0]])
report.append(['Total Calc Vp2', calc_total[1]])
report.append(['Actual Calc Vp2', calc_done[1]])
report.append(['Test Time', round(sum(test_times), 2)])
report.append(['Avg Time/Fold', round(sum(test_times) / len(test_times), 2)])
report.append(['Disagg Time', '{0:.10f}'.format(disagg_rate)])
report.append(['Unexpected', unexpected_event])
report.append(['Adapted', adapted_event])
report.append(['Adapted Errors', adapted_errors])
report.append(['Mult-Switches', multi_switches_count])
report.append(['Noise', round(y_noise / y_total, 4)])


Evaluation and accuracy testing complete:
	Test Time was 0.01  min (avg  0.01  min/fold).
	Optimization (Time) - Viterbi Part 1: 0.0 % saved,  32 calculations (average 4.0 calculations each time)
	Optimization (Time) - Viterbi Part 2: 0.0 % saved,  128 calculations (average 16.0 calculations each time)
	Unexpected events = 8 , Multiple switch events = 0 , Adapted events = 0 (errors = 0 )


Classification & Esitmation Accuracies (Test bVa_BigO_L01):

	Accuracy     =   0.00% (8 incorrect tests)
	Precision    =   0.00%
	Recall       =   0.00%
	F-Score      =   0.00%

	M Precision  =   0.00%
	M Recall     =   0.00%
	M F-Score    =   0.00%

	FS Precision =   0.00%
	FS Recall    =   0.00%
	FS F-Score   =   0.00%

	NDE          = 100.00%
	MAPE         =   0.00%
	RMSE         =   1.00
	Esitmation   =   0.00% (8.0 A difference)

	|----------|----------|---------|-----------|-----------|----------|-------------------------------|------------|-------------------|
	|          |          |        

In [18]:
print('-------------------------------- CSV REPORTING --------------------------------')
print()
print(','.join([c[0] for c in report]))
print(','.join([str(c[1]) for c in report]))
print()
(acc_hdr, acc_det) = acc.csv(test_id, labels, measure)
print(acc_hdr)
print(acc_det)
print()
print('-------------------------------- ------------- --------------------------------')

print()
print('End Time = ', datetime.now(), '(local time)')
print()
print('DONE!!!')


-------------------------------- CSV REPORTING --------------------------------

Test ID,Run Date,Dataset,Precision,Denoised?,Model Noise?,Limit,Algorithm,Folds,Measure,Tests,Total Calc Vp1,Actual Calc Vp1,Total Calc Vp2,Actual Calc Vp2,Test Time,Avg Time/Fold,Disagg Time,Unexpected,Adapted,Adapted Errors,Mult-Switches,Noise
bVa_BigO_L01,2021-04-30 20:06:10.780468,AMPdsR1_1min_A,10.0,noisy,False,8,Viterbi,1,A,8,32,32,128,128,0.01,0.01,0.0001246333,8,0,0,0,0.0

Test ID,Item,Correct,Incorrect,TP,Inacc,APT,ITP,TN,FP,FN,Basic Acc,Precision,Recall,F-Score,M Precision,M Recall,M F-Score,FS Precision,FS Recall,FS F-Score,RMSE,NDE,MAPE,Kolter,Est Acc,Estimated,Actual,Diff,Est of Total,Actual of Total
bVa_BigO_L01,*TL,0,8,0,0.0,0,0,0,0,8,0.0,0,0.0,0,0,0.0,0,0,0.0,0,1.0,1.0,0.0,0.5,0.0,0.0,8.0,8.0,0,1.0
bVa_BigO_L01,BME,0,8,0,0.0,0,0,0,0,8,0.0,0,0.0,0,0,0.0,0,0,0.0,0,1.0,1.0,0.0,0.5,0.0,0.0,8.0,8.0,0,1.0


-------------------------------- ------------- --------------------------------

End Time