In [1]:
import pandas as pd
import numpy as np
import scipy.io
import matplotlib.pyplot as plt
import os as os

In [2]:
def load_aggregated_data(aggregated_path):
    mat_data = scipy.io.loadmat(aggregated_path)
    appliance_id = list(mat_data.keys())[3]
    structured_array = mat_data[appliance_id]
    structured_array = np.array(structured_array)
    # Print the DataFrame
    real_power = structured_array[0][0][0]
    real_power = real_power.reshape(real_power.shape[0], )
    # Create a plot
    plt.plot(real_power)

    # Add title and labels
    plt.title('Aggregated power')
    plt.xlabel('Index')
    plt.ylabel('Value')

    # Show the plot
    plt.show()
    
def transform_to_dataframe(aggregated_path):
    # Load the MATLAB file
    mat_data = scipy.io.loadmat(aggregated_path, struct_as_record=True)
    appliance_id = list(mat_data.keys())[3]
    structured_array = mat_data[appliance_id]
    array_data = structured_array[0][0]
    column_names = [
        'powerallphases', 'powerl1', 'powerl2', 'powerl3', 'currentneutral',
        'currentl1', 'currentl2', 'currentl3', 'voltagel1', 'voltagel2',
        'voltagel3', 'phaseanglevoltagel2l1', 'phaseanglevoltagel3l1',
        'phaseanglecurrentvoltagel1', 'phaseanglecurrentvoltagel2',
        'phaseanglecurrentvoltagel3'
    ]
    data_dict = {}
    for i in range(16):
        col_data = array_data[i]
        col_data = col_data.reshape(col_data.shape[0],)
        data_dict[column_names[i]] = col_data
    return pd.DataFrame(data_dict)

# Aggregated data

## Informations about the fields:
* powerallphases: Sum of real power over all phases
* powerl1: Real power phase 1
* powerl2: Real power phase 2
* powerl3: Real power phase 3
* currentneutral: Neutral current
* currentl1: Current phase 1
* currentl2: Current phase 2
* currentl3: Current phase 3
* voltagel1: Voltage phase 1
* voltagel2: Voltage phase 2
* voltagel3: Voltage phase 3
* phaseanglevoltagel2l1: Phase shift between voltage on phase 2 and 1
* phaseanglevoltagel3l1: Phase shift between voltage on phase 3 and 1
* phaseanglecurrentvoltagel1: Phase shift between current/voltage on phase 1
* phaseanglecurrentvoltagel2: Phase shift between current/voltage on phase 2 
* phaseanglecurrentvoltagel3: Phase shift between current/voltage on phase 3

In [30]:
aggregated_base = "/home/mrcong/Code/mylab-nilm-files/data_adapter/data/ECO/01_mat/aggregated"
csv_transform_base = "/home/mrcong/Code/mylab-nilm-files/data_adapter/data/ECO/01_csv_transform/aggregated"

## Test data

In [21]:
selected_dated = "2012-06-01.mat"
aggregated_path = os.path.join(aggregated_base, selected_dated)
transform_to_dataframe(aggregated_path)

Unnamed: 0,powerallphases,powerl1,powerl2,powerl3,currentneutral,currentl1,currentl2,currentl3,voltagel1,voltagel2,voltagel3,phaseanglevoltagel2l1,phaseanglevoltagel3l1,phaseanglecurrentvoltagel1,phaseanglecurrentvoltagel2,phaseanglecurrentvoltagel3
0,1020,100,70,850,3.0,0.7,0.3,3.7,238.4,238,237,240,120,320,344,12
1,1020,100,70,850,3.0,0.7,0.3,3.7,238.5,238,237,240,120,320,344,12
2,1020,100,70,850,3.0,0.7,0.3,3.7,238.5,238,237,240,120,320,343,12
3,1020,100,70,850,3.0,0.7,0.3,3.7,238.5,238,237,240,120,320,343,12
4,1020,100,70,850,3.0,0.7,0.3,3.7,238.7,239,238,240,120,320,343,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86395,200,120,70,10,0.8,0.9,0.3,0.1,238.5,238,238,240,120,327,344,292
86396,200,120,70,10,0.8,0.9,0.3,0.1,238.7,238,238,240,120,327,344,293
86397,200,120,70,10,0.8,0.9,0.3,0.1,238.8,238,238,240,120,328,344,292
86398,200,120,70,10,0.8,0.9,0.3,0.1,238.8,238,238,240,120,328,344,293


### Tranform all aggregated data to csv

In [35]:
files_and_directories = os.listdir(aggregated_base)
mat_files = [f for f in files_and_directories if f.lower().endswith('.mat')]
for mat_file in mat_files:
    file_path = os.path.join(aggregated_base, mat_file)
    file_name = mat_file.replace(".mat", "")
    trasform_df = transform_to_dataframe(aggregated_path=file_path)
    trasform_df.to_csv(os.path.join(csv_transform_base, file_name+".csv"), index=False)