In [1]:
import warnings
warnings.filterwarnings(action="ignore")

import numpy as np
import pandas as pd



In [2]:
def chunkify(df: pd.DataFrame, chunk_size: int, stride: int = 1):
    start = 0
    length = df.shape[0]

    # If DF is smaller than the chunk, return the DF
    if length <= chunk_size:
        return df[:]

    # Producing individual chunks
    dfs = []
    # while start + chunk_size <= length:
    #     dfs.append(df[start:chunk_size + start])
    #     start = start + chunk_size
    for i in range(0, length - chunk_size, stride):
        dfs.append(df[i:i + chunk_size])
    return dfs

In [3]:
df_active_value = pd.read_csv('../data/redd_active_value_f1hz.csv')
attributes = [c for c in df_active_value.columns.values if not c in ['timestamp']]
labels = [c for c in df_active_value.columns.values if not c in ['timestamp', 'mains', 'amplitude_spectrum', 'phase_spectrum']]
#labels = ['Fridge01','Dish washer01','Microwave01','Washer dryer01','Washer dryer02']
predictors = ['mains']
index_name = 'timestamp'
training_start = '2011-04-16'
training_end = '2011-05-16'
test_start = '2011-05-17'
test_end = '2011-05-31'

# Ensure 'timestamp' column is in datetime format
df_active_value[index_name] = pd.to_datetime(df_active_value[index_name])
# Set the index as the timestamp
df_active_value.set_index(index_name, inplace=True)

In [4]:
windows_size_opt = 1120

In [5]:
training_active_value_set = df_active_value.loc[training_start:training_end]

data_value_train_min = {}
for a in labels: # type: ignore
    p = predictors.copy()
    p.append(a)
    data_value_train_min[a] = chunkify(
        training_active_value_set[p], # type: ignore
        windows_size_opt,
        windows_size_opt
    )

test_active_value_set = df_active_value.loc[test_start:test_end]
    
data_value_test_min = {}
for a in labels: # type: ignore
    p = predictors.copy()
    p.append(a)
    data_value_test_min[a] = chunkify(
        test_active_value_set[p], # type: ignore
        windows_size_opt,
        windows_size_opt
    )

In [6]:
df_appliance_threshold = pd.read_csv('../data/appliance_threshold.csv')
appliance_threshold = df_appliance_threshold.set_index('appliance')['threshold'].to_dict()

# Función para calcular la proporción de valores por encima del umbral
def proportion_above_threshold(values, threshold):
    return np.sum(values > threshold) / len(values)

train_value_min_label_list = list()
for i in range(len(data_value_train_min[labels[0]])):
    row_label = dict()
    row_label['row'] = i
    for a in labels:
        # Calcula la proporción de valores por encima del umbral
        proportion = proportion_above_threshold(data_value_train_min[a][i][a], appliance_threshold[a])
        # Si la mayoría de los valores (más del 50%) están por encima del umbral, asigna 1
        if proportion > 0.5:
            row_label[a] = 1
        else:  
            row_label[a] = 0
    train_value_min_label_list.append(row_label)

test_value_min_label_list = list()
for i in range(len(data_value_test_min[labels[0]])):
    row_label = dict()
    row_label['row'] = i
    for a in labels:
        # Calcula la proporción de valores por encima del umbral
        proportion = proportion_above_threshold(data_value_test_min[a][i][a], appliance_threshold[a])
        # Si la mayoría de los valores (más del 50%) están por encima del umbral, asigna 1
        if proportion > 0.5:
            row_label[a] = 1
        else:  
            row_label[a] = 0
    test_value_min_label_list.append(row_label)

train_value_min_label_list = pd.DataFrame(train_value_min_label_list)
test_value_min_label_list = pd.DataFrame(test_value_min_label_list)

train_value_min_label_list.to_csv(f'../data/train_value_min_label_windows_{windows_size_opt}_llm.csv', index=False)
test_value_min_label_list.to_csv(f'../data/test_value_min_label_windows_{windows_size_opt}_llm.csv', index=False)
