In [None]:
import sys, os

import random
import torch
import statistics as stat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import nilmtk

sys.path.append(os.path.dirname(os.getcwd()))

In [None]:
import src.functions as F
from src.enumerations import *
from src.objs import DatasetProfile, ProfileHandler, NormalizationHandler
from src.utils import paths_manager as pathsman
from src.stats.Statistical_analysis import extract_states, state_plot

In [None]:
## Variable declaration for dataset
BUILDING = 1
START_TIME = "2015-01-01"
# END_TIME = "2015-01-15"
END_TIME = "2015-03-31"

FRIDGE_FREEZER = "fridge freezer"
KETTLE = "kettle"
DISHWASHER = "dish washer"
MICROWAVE = "microwave"
WASHER_DRYER = "washer dryer"

applist = [FRIDGE_FREEZER, WASHER_DRYER, KETTLE, DISHWASHER, MICROWAVE]


In [None]:
## Dataset initialization
dataset = nilmtk.DataSet(pathsman.UKDALE_H5_PATH)
dataset.set_window(start=START_TIME, end=END_TIME)

In [None]:
def quantile_filter_2(sequence_length, data, p=50):
    new_mains = F.scan_sequences(data=data, mode="odd_window", seq_len=sequence_length)
    new_mains = np.percentile(new_mains, p, axis=1, interpolation="nearest")
    return new_mains

In [None]:
power_elec = dataset.buildings[BUILDING].elec

In [None]:
print(f"Activation stats in the period [{START_TIME}, {END_TIME}]")
row = []

for app in applist:
    col = []
    app_name = app.upper()
    activations = power_elec[app].get_activations()
    total_num = len(activations)
    average_length = stat.mean(map(len, activations))
    max_length = max(map(len, activations))
    min_length = min(map(len, activations))
    col.append(total_num)
    col.append(average_length)
    col.append(max_length)
    col.append(min_length)
    row.append(col)

pd.DataFrame(row, columns=["No. of Activations", "Average Length", "Max Length", "Min Length"], index=applist)

In [None]:
info = {
        FRIDGE_FREEZER : {
                "window" : 50,
                "min" : 0,
                "max" : 0,
                "on_power_threshold" : 10,
        },
        WASHER_DRYER : {               
                "window" : 50,
                "min" : 0,
                "max" : 0,
                "on_power_threshold" : 10,
        },
        KETTLE : {
                "window" : 50,
                "min" : 0,
                "max" : 0,
                "on_power_threshold" : 10,
        },
        DISHWASHER : {
                "window" : 10,
                "min" : 0,
                "max" : 0,
                "on_power_threshold" : 10,
        },
        MICROWAVE : {
                "window" : 50,
                "min" : 0,
                "max" : 0,
                "on_power_threshold" : 10,
        }
}

In [None]:
# for app in applist:
#         power_elec[app].power_series_all_data().plot()

In [None]:
FULL_NUM_SAMPLES = 1200000

ACTIVATION_EXTENSION_MAPPING = {
        FRIDGE_FREEZER : {
                "mode" : "rightpadder",
                "padding_mode" : 0,
        },
        WASHER_DRYER : {
                "mode" : "rightpadder",
                "padding_mode" : 0,
        },
        KETTLE : {
                "mode" : "rightpadder",
                "padding_mode" : 0,
        },
        DISHWASHER : {
                "mode" : "rightpadder",
                "padding_mode" : 0,
        },
        MICROWAVE : {
                "mode" : "rightpadder",
                "padding_mode" : 0,
        },
}

In [None]:
unfiltered_power = []
filtered_power = []
states = []
targets = []

normalizer = NormalizationHandler(mode="minmax", min_val=0, max_val=3300)

for app in applist:
        elec = power_elec[app]
        mode = ACTIVATION_EXTENSION_MAPPING[app]["mode"]
        padding_mode = ACTIVATION_EXTENSION_MAPPING[app]["padding_mode"]

        extended_activations = F.extend_activations(elec, mode=mode, num_full_samples=FULL_NUM_SAMPLES, padding_mode=padding_mode)
        unfiltered_power.append(extended_activations)
        filtered_data = quantile_filter_2(info[app]["window"], extended_activations, p=50)
        filtered_power.append(filtered_data)
        info[app]['min'] = 0.0
        info[app]['max'] = float(filtered_data.max())
        info[app]['on_power_threshold'] = elec.on_power_threshold()
        binarized_data = F.binarization(filtered_data, mode="threshold", threshold=power_elec[app].on_power_threshold())
        states.append(binarized_data)
        normalizer.select_mode(mode="minmax", min_val=info[app]['min'], max_val=info[app]['max'])
        normalized_data = normalizer.normalize(filtered_data)
        targets.append(normalized_data)

In [None]:
states = np.stack(states).T
targets = np.stack(targets).T

In [None]:
mains = np.sum(unfiltered_power, axis=0)

In [None]:
mains_denoise = quantile_filter_2(10, mains, 50)
mains = mains-np.percentile(mains, 1)
mains = np.where(mains < mains_denoise, mains_denoise, mains)
mains = quantile_filter_2(10, mains, 50)

In [None]:
normalizer.select_mode(mode="minmax", min_val=mains_denoise.min(), max_val=mains_denoise.max())
norm_mains_denoise = normalizer.normalize(mains_denoise)

normalizer.select_mode(mode="minmax", min_val=mains.min(), max_val=mains.max())
norm_mains = normalizer.normalize(mains)

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self,  inputs, targets, states,  seq_len=99):
        self.inputs = inputs
        self.targets = targets
        self.states  = states
        seq_len = seq_len  if seq_len% 2==0 else seq_len+1
        self.seq_len = seq_len
        self.len = self.inputs.shape[0] - self.seq_len
        self.indices = np.arange(self.inputs.shape[0])

    def __len__(self):
        'Denotes the total number of samples'
        return self.len
    
    def get_sample(self, index):
        indices = self.indices[index : index + self.seq_len]
        inds_inputs=sorted(indices[:self.seq_len])
        inds_targs=sorted(indices[self.seq_len-1:self.seq_len])

        return self.inputs[inds_inputs], self.targets[inds_targs], self.states[inds_targs]

    def __getitem__(self, index):
        inputs, target, state = self.get_sample(index)
        return torch.tensor(inputs).unsqueeze(-1).float(), torch.tensor(target).float().squeeze(), torch.tensor(state).long().squeeze()

In [None]:
dataset = Dataset(inputs=norm_mains_denoise, targets=targets, states=states, seq_len=100)

In [None]:
batch_size = 8
num_workers = 1

train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=num_workers)

In [None]:
outputs = []

In [None]:
for item in train_dataloader:
        logs = {"power": item[1], "state": item[2]}
        outputs.append(logs)

In [None]:
power = torch.cat([x['power'] for x in outputs], 0).cpu().numpy()
state = torch.cat([x['state'] for x in outputs], 0).cpu().numpy().astype(np.int32)

for idx, app in enumerate(applist):
        power[:,idx] = (power[:, idx] * (info[app]["max"] - info[app]["min"])) + info[app]['min']