# Convert MATLAB Simulation Files to HDF5
The original data are in MATLAB v73 format as large tabular formats. 

We use [Mat73](https://pypi.org/project/mat73/) to read the data, then save the data into a documented format.

In [1]:
from battdat.schemas.battery import ElectrodeDescription
from battdat.schemas.column import CycleLevelData, ColumnInfo
from battdat.schemas import BatteryMetadata, BatteryDescription, ModelMetadata
from battdat.data import CellDataset
from pathlib import Path
from tables import File
from mat73 import loadmat
from shutil import rmtree
import pandas as pd
import numpy as np
import json

Configuration

In [2]:
known_columns = {  # Columns in data mapped to names in schema
    't': 'cycle_start',
    'dt': 'cycle_duration',
    'Q': 'energy_discharge',
    'q': 'capacity_discharge',
    'TdegC': 'temperature_average'
}

## Make the base metadata
All batteries will use the same metadata and are based on the battery described in [Smith et al.](https://ieeexplore.ieee.org/document/7963578)

In [3]:
metadata = BatteryMetadata(
    source='gasper_synthetic',
    battery=BatteryDescription(
        manufacturer='kokam',
        design='SLPB 120255255 277.50 Wh 2015',
        nominal_capacity=75,
        cathode=ElectrodeDescription(name='NMC'),
        anode=ElectrodeDescription(name='graphite'),
    ),
    modeling=ModelMetadata(
        name='BLAST',
        version='0.2.1',
        type='physics',
        models=['MaterialsModel'],
        simulation_type='StandaloneModelSimulation',
        references=[
            'https://github.com/NREL/BLAST-Lite/blob/main/python/nmc111_gr_Kokam75Ah_2017.py',
            'https://doi.org/10.23919/ACC.2017.7963578',
        ],
    )
)

Load in the descriptions of the variables. (Parsing from the README)

In [4]:
new_columns = {}
with open('README.md') as fp:
    for line in fp:
        if line.startswith("-"):
            line = line.strip()[2:]  # Strip off the "- "
            name, desc = line.split(":", 1)
            if name not in known_columns:
                new_columns[name] = ColumnInfo(description=desc)

## Iterate over Each File
Write out each row of the "constant" and "varying" to a different HDF5 file

In [5]:
out_path = Path('processed')
if out_path.exists():
    rmtree(out_path)
out_path.mkdir(exist_ok=True)

In [6]:
for name in ['constant', 'varying']: 
    # Start by loading the data
    all_data = loadmat(f'raw/ROVI - {name} inputs.mat')['simulations']

    # Get the column names for inputs and outputs
    input_cols = all_data.pop('Input_vars')[0]
    output_cols = all_data.pop('Output_vars')[0]

    # Iterate over each row, which is a different input parameter set
    for i, (inputs, outputs) in enumerate(zip(all_data['Inputs'], all_data['Outputs'])):
        inputs = inputs.astype(np.float32)
        outputs = np.array(outputs, dtype=np.float32)  # 4D: n_samples x 1 x num_days x num_cols

        # Iterate over each instance of this parameter set, which is a different set of degradation parameters
        with File(out_path / f'{name}-{i}.h5', mode='a') as fo:
            for j, output in enumerate(outputs[:, 0, :, :]):
                data = np.concatenate([inputs, output], axis=1)
    
                # Convert to dataframe then rename columns that are known to batdata
                data = pd.DataFrame(data, columns=input_cols + output_cols)
                data['cycle_number'] = np.arange(len(data))
    
                # Convert columns known to batdata
                data.rename(columns=known_columns, inplace=True)
                data[['cycle_duration', 'cycle_start']] *= 3600 * 24  # Convert to seconds
    
                # Assemble into a battery dataset
                bd = CellDataset(cycle_stats=data, metadata=metadata)
                bd.schemas['cycle_stats'].extra_columns.update(new_columns)
                bd.validate()
    
                # Save to a group with our specific data
                bd.to_hdf(fo, prefix=f'run_{j}', complevel=9, complib='zlib')

    del all_data  # Clear old data before getting new