### Read and process the data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, InputLayer
import random
import os

import math

import pylab as p
import seaborn as sns
from datetime import datetime, timedelta
import string
from sklearn.preprocessing import OneHotEncoder

In [2]:
def process_and_trim(LF1I, LF1V, LF2I, LF2V, TimeTicks1, TimeTicks2):
    print("Matchine the lengths...")
    min_length = min(len(LF1I), len(LF1V), len(LF2I), len(LF2V))

    # Trim the DataFrames to match the size of the smallest DataFrame, this will at most trim one or two entries
    LF1I = LF1I.head(min_length)
    LF1V = LF1V.head(min_length)
    TimeTicks1 = TimeTicks1.head(min_length)
    LF2I = LF2I.head(min_length)
    LF2V = LF2V.head(min_length)
    TimeTicks2 = TimeTicks2.head(min_length)
    print("Done!")

    print("Making volts and amps into complex numbers...")
    LF1V = np.array(LF1V)
    LF1I = np.array(LF1I)
    LF1V = np.array([[complex(y.replace("i", "j")) for y in x] for x in LF1V])
    LF1I = np.array([[complex(y.replace("i", "j")) for y in x] for x in LF1I])

    LF2V = np.array(LF2V)
    LF2I = np.array(LF2I)
    LF2V = np.array([[complex(y.replace("i", "j")) for y in x] for x in LF2V])
    LF2I = np.array([[complex(y.replace("i", "j")) for y in x] for x in LF2I])
    print("Done!")

    print("Calculating power...")
    L1_P = LF1V * np.conjugate(LF1I)
    L2_P = LF2V * np.conjugate(LF2I)

    L1_ComplexPower = np.sum(L1_P, axis=1)
    L2_ComplexPower = np.sum(L2_P, axis=1)

    L1_real = np.real(L1_ComplexPower)
    L1_imag = np.imag(L1_ComplexPower)
    L1_app = np.abs(L1_ComplexPower)

    L2_real = np.real(L2_ComplexPower)
    L2_imag = np.imag(L2_ComplexPower)
    L2_app = np.abs(L2_ComplexPower)

    L1_Pf = np.cos(np.angle(L1_P[:, 0]))
    L2_Pf = np.cos(np.angle(L2_P[:, 0]))

    L1_actual_power = L1_real * L1_Pf
    L2_actual_power = L2_real * L2_Pf

    power = L1_actual_power + L2_actual_power[:len(L1_actual_power)]
    print("Done!")

    return L1_actual_power, L2_actual_power, TimeTicks1, TimeTicks2, power

# Directory is the path to the directory where to house's information is
def read_files_in_directory(directory):
    data = []

    for root, _, files in os.walk(directory):
        if "Testing" in root:
            continue

        day_data = pd.DataFrame()
        LF1I = pd.DataFrame()
        LF1V = pd.DataFrame()
        TimeTicks1 = pd.DataFrame()
        LF2I = pd.DataFrame()
        LF2V = pd.DataFrame()
        TimeTicks2 = pd.DataFrame()
        for file in files:
            # We don't do anything with these files, so we can just skip
            if "TaggingInfo.csv" in file or "HF" in file:
                continue

            file_path = os.path.join(root, file)

            if file == "LF1I.csv":
                LF1I = pd.read_csv(file_path)
                continue

            if file == "LF1V.csv":
                LF1V = pd.read_csv(file_path)
                continue

            if file == "TimeTicks1.csv":
                TimeTicks1 = pd.read_csv(file_path)
                continue

            if file == "LF2I.csv":
                LF2I = pd.read_csv(file_path)
                continue

            if file == "LF2V.csv":
                LF2V = pd.read_csv(file_path)
                continue

            if file == "TimeTicks2.csv":
                TimeTicks2 = pd.read_csv(file_path)
                continue

        if not LF1I.empty and not LF1V.empty and not LF2I.empty and not LF2V.empty:
            print(f"{root}: ")
            L1_actual_power, L2_actual_power, TimeTicks1, TimeTicks2, power = process_and_trim(LF1I, LF1V, LF2I, LF2V, TimeTicks1, TimeTicks2)
            day_data["L1_actual_power"] = L1_actual_power
            day_data["L2_actual_power"] = L2_actual_power
            day_data["TimeTicks1"] = TimeTicks1
            day_data["TimeTicks2"] = TimeTicks2
            day_data["Power"] = power
            data.append(day_data)

    return data

In [3]:
data = read_files_in_directory(r".\dataset\H1")

.\dataset\H1\Tagged_Training_04_13_1334300401: 
Matchine the lengths...
Done!
Making volts and amps into complex numbers...
Done!
Calculating power...
Done!
.\dataset\H1\Tagged_Training_10_22_1350889201: 
Matchine the lengths...
Done!
Making volts and amps into complex numbers...
Done!
Calculating power...
Done!
.\dataset\H1\Tagged_Training_10_23_1350975601: 
Matchine the lengths...
Done!
Making volts and amps into complex numbers...
Done!
Calculating power...
Done!
.\dataset\H1\Tagged_Training_10_24_1351062001: 
Matchine the lengths...
Done!
Making volts and amps into complex numbers...
Done!
Calculating power...
Done!
.\dataset\H1\Tagged_Training_10_25_1351148401: 
Matchine the lengths...
Done!
Making volts and amps into complex numbers...
Done!
Calculating power...
Done!
.\dataset\H1\Tagged_Training_12_27_1356595201: 
Matchine the lengths...
Done!
Making volts and amps into complex numbers...
Done!
Calculating power...
Done!


In [4]:
def moving_average(array, window):
    moving_avg = np.convolve(array, np.ones(window) / window, mode='valid')
    return moving_avg

def normalize(array):
    min_val = np.min(array)
    max_val = np.max(array)
    normalized_array = (array - min_val) / (max_val - min_val)
    return normalized_array

def detect_cusum(array, threshold=1, drift=0):
    sum_positive = np.zeros(array.size)
    sum_negative = np.zeros(array.size)
    event_index = np.array([[], [], []], dtype=int)

    for i in range(1, array.size):
        sum = array[i] - array[i-1]

        # sums for positive/negative changes
        sum_positive[i] = max(0, sum_positive[i-1] + sum - drift)
        sum_negative[i] = max(0, sum_negative[i-1] - sum - drift)

        # if a change is detected
        if sum_positive[i] > threshold or sum_negative[i] > threshold:
            event_index = np.append(event_index, i)
            sum_positive[i], sum_negative[i] = 0, 0

    return event_index

def filter_close_events(event_indices, min_separation):
    filtered_indices = [event_indices[0]]

    for i in range(1, len(event_indices)):
        if event_indices[i] - event_indices[i - 1] >= min_separation:
            # If the time separation is greater than or equal to the threshold, keep the event
            filtered_indices.append(event_indices[i])

    return np.array(filtered_indices)

In [11]:
def calculate_events(data):
    for index in range(len(data)):
        smooth_power = moving_average(data[index]["Power"], 5)
        normalized_power = normalize(smooth_power)

        datapoint_events = detect_cusum(normalized_power, threshold=0.015, drift=0.001)
        datapoint_events = filter_close_events(datapoint_events, 24)

        data[index]["Events"] = 0
        data[index].loc[datapoint_events, "Events"] = 1

    return data

In [12]:
data = calculate_events(data)
print(data)

[303160 303212 303510 303583 305308 305352 305782 315754 317263 317949
 320234 321711 322269 323858 324464 324821 326122 326300 326359 326491
 326525 327303 327363 327618 327924 328358 329890 331511 333177 333285
 333461 333688 333882 334223 334728 335205 335377 335411 336164 336415
 336675 336939 337692 337812 337849 338066 338306 338405 338492 338623
 338959 339632 340025 340290 340496 341004 342843 343360 344780 345924
 347317 348236 349568 350118 350223 350480 350593 350722 350826 350877
 350965 351558 351662 351766 351877]
[  3207   8756  14270  19805  25329  30828  36340  41806  47339  52786
  58320  63752  69290  74680  80228  85559  91125  96495 102103 107467
 113078 118402 124005 129316 134926 140223 145842 151072 156706 160251
 162840 171239 185849 191614 197957 203643 209411 215029 220575 226161
 231587 237172 242620 248199 253606 259094 264519 270024 275573 281076
 286617 292076 297592 298141 298713 301693 301920 302372 302739 302982
 303092 303460 303818 304175 306780 3071