# Convert MATLAB Simulation Files to HDF5
The original data are in MATLAB v73 format as large tabular formats. 

We use [Mat73](https://pypi.org/project/mat73/) to read the data, then save the data into a documented format.

In [1]:
from batdata.schemas import BatteryMetadata
from batdata.data import BatteryDataset
from pathlib import Path
from mat73 import loadmat
from h5py import File
import pandas as pd
import numpy as np
import json

## Make the base metadata
All batteries will use the same

In [2]:
metadata = BatteryMetadata(
    source='gasper_synthetic',
)

TODO: Make space in the schema for describing the code versions, inputs, etc

## Iterate over Each File
Write out each row of the "constant" and "varying" to a different HDF5 file

In [3]:
out_path = Path('processed')
out_path.mkdir(exist_ok=True)

In [4]:
for name in ['constant', 'varying']:
    # Start by loading the data
    all_data = loadmat(f'raw/ROVI - {name} inputs.mat')['simulations']

    # Get the column names for inputs and outputs
    input_cols = all_data.pop('Input_vars')[0]
    output_cols = all_data.pop('Output_vars')[0]

    # Iterate over each row, which is a different input parameter set
    for i, (inputs, outputs) in enumerate(zip(all_data['Inputs'], all_data['Outputs'])):
        inputs = inputs.astype(np.float32)
        outputs = np.array(outputs, dtype=np.float32)  # 4D: n_samples x 1 x num_days x num_cols

        # Iterate over each instance of this parameter set
        for j, output in enumerate(outputs[:, 0, :, :]):
            data = np.concatenate([inputs, output], axis=1)

            # Convert to dataframe then rename columns that are known to batdata
            data = pd.DataFrame(data, columns=input_cols + output_cols)
            data['cycle_number'] = np.arange(len(data))

            # Convert columns known to batdata
            data.rename(columns={
                't': 'cycle_start',
                'dt': 'cycle_duration',
                'Q': 'energy_discharge',
                'q': 'capacity_discharge',
                'TdegC': 'temperature_average'
            }, inplace=True)
            data[['cycle_duration', 'cycle_start']] *= 3600 * 24  # Convert to seconds

            # Assemble into a battery dataset
            bd = BatteryDataset(cycle_stats=data, metadata=metadata)
            bd.validate()

            # Save to a group with our specific data
            bd.to_batdata_hdf(out_path / f'{name}-{i}-{j}.h5', complevel=9, complib='zlib')
            break

In [5]:
bd.cycle_stats

Unnamed: 0,cycle_duration,cycle_start,dEFC,EFC,soc,dod,Ua,UaN,temperature_average,TdegK,...,r,r_LLI,r_LAM,q_loss_LLI_cal,q_loss_LLI_cyc,q_loss_LAM,r_gain_LLI_cal,r_gain_LLI_cyc,r_gain_LAM,cycle_number
0,0.0,0.0,0.000000,0.000000,0.394986,0.675589,0.130716,1.062732,32.696548,305.846558,...,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,0.000000,0
1,86400.0,86400.0,0.431958,0.431958,0.366224,0.796028,0.134674,1.094908,48.285999,321.436005,...,1.001803,1.001803,0.976232,0.004284,0.000303,1.000000e-10,0.000244,0.001559,-0.023768,1
2,86400.0,172800.0,0.401069,0.833027,0.822457,0.138370,0.092163,0.749295,11.585956,284.735962,...,1.001946,1.001946,0.976232,0.004388,0.000303,1.005290e-10,0.000386,0.001560,-0.023768,2
3,86400.0,259200.0,1.654039,2.487066,0.589398,0.799052,0.119488,0.971448,12.138327,285.288330,...,1.002192,1.002192,0.976232,0.004392,0.000355,1.061520e-10,0.000445,0.001747,-0.023768,3
4,86400.0,345600.0,0.906206,3.393272,0.620053,0.114531,0.117489,0.955192,15.461373,288.611359,...,1.002277,1.002277,0.976232,0.004402,0.000356,1.073742e-10,0.000528,0.001749,-0.023768,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7295,86400.0,630288000.0,1.152271,7232.005859,0.486451,0.855861,0.123895,1.007273,28.675150,301.825165,...,3.000000,3.000000,3.000000,0.142685,0.185251,1.010000e+00,3.000000,0.537597,2.376791,7295
7296,86400.0,630374400.0,0.460114,7232.466309,0.261990,0.012202,0.160039,1.301132,43.605671,316.755676,...,3.000000,3.000000,3.000000,0.142685,0.185251,1.010000e+00,3.000000,0.537598,2.376791,7296
7297,86400.0,630460800.0,0.344893,7232.811035,0.226749,0.463008,0.171532,1.394570,43.750523,316.900513,...,3.000000,3.000000,3.000000,0.142685,0.185254,1.010000e+00,3.000000,0.537606,2.376791,7297
7298,86400.0,630547200.0,0.761166,7233.572266,0.619134,0.634274,0.117559,0.955763,37.050014,310.200012,...,3.000000,3.000000,3.000000,0.142687,0.185266,1.010000e+00,3.000000,0.537634,2.376791,7298


## 