In [13]:
import os
import numpy as np
import pandas as pd

# Data preprocessing

## Extract data from the low current OCV test to create a reference curve

### Used functions

In [12]:
# load data from xlsx to dict


def load_data_to_dict(dir_path: str, **file_keys: str) -> dict[str, pd.DataFrame]:
    """
    Loads .xlsx files from the specified directory and concatenates all sheets
    into a single DataFrame per file.

    Returns a dict in the format: {file_key: DataFrame}.

    Parameters:
    ----------
    dir_path : str
        Path to the directory containing .xlsx files.
    **file_keys : str
        Optional mapping of custom keys to filenames (e.g., key="file.xlsx").

    Example:
    -------
    >>> data = load_data_to_dict("data/", train="train_data.xlsx")
    >>> df = data["train"]
    """
    data = {}

    if not os.path.exists(dir_path):
        raise FileNotFoundError(f"Directory '{dir_path}' not found!")
    if not os.listdir(dir_path):
        raise ValueError(f"Directory '{dir_path}' is empty!")

    for filename in os.listdir(dir_path):
        if not filename.endswith(".xlsx"):
            print(f"Skipped file '{filename}' (not .xlsx)")
            continue

        full_path = os.path.join(dir_path, filename)
        file_key = next(
            (k for k, v in file_keys.items() if v == filename),
            os.path.splitext(filename)[0],
        )

        try:
            xls = pd.ExcelFile(full_path)
            # Read all sheets and concatenate them
            dfs = [xls.parse(sheet_name) for sheet_name in xls.sheet_names]
            combined_df = pd.concat(dfs, ignore_index=True)
            data[file_key] = combined_df
            print(f"Loaded and combined '{filename}' as '{file_key}'")
        except Exception as e:
            print(f"Error loading file '{filename}': {str(e)}")
            continue

    if not data:
        raise ValueError("No valid .xlsx files were loaded!")

    return data

### Initialization

In [8]:
low_current_OCV_test_sp1 = load_data_to_dict(
    "../data/OCV-SOC/Low_Current_OCV/Sample1",
    initial="low_current_OCV_test_Initial_capacity_SP1.xlsx",
    sp1_0="low_current_OCV_test_SP1-0.xlsx",
    sp1_25="low_current_OCV_test_SP1-25.xlsx",
    sp1_45="low_current_OCV_test_SP1-45.xlsx",
)

Loaded and combined 'low_current_OCV_test_Initial_capacity_SP1.xlsx' as 'initial'
Loaded and combined 'low_current_OCV_test_SP1-0.xlsx' as 'sp1_0'
Loaded and combined 'low_current_OCV_test_SP1-25.xlsx' as 'sp1_25'
Loaded and combined 'low_current_OCV_test_SP1-45.xlsx' as 'sp1_45'


In [None]:
low_current_OCV_test_sp2 = load_data_to_dict(
    "../data/OCV-SOC/Low_Current_OCV/Sample2",
    initial="low_current_OCV_test_Initial_capacity_SP2.xlsx",
    sp1_0="low_current_OCV_test_SP2-0.xlsx",
    sp1_25="low_current_OCV_test_SP2-25.xlsx",
    sp1_45="low_current_OCV_test_SP2-45.xlsx",
)

### Cleaning & Parsing

In [15]:
low_current_OCV_test_sp1["sp1_0"].head()

Unnamed: 0,Data_Point,Test_Time(s),Date_Time,Step_Time(s),Step_Index,Cycle_Index,Current(A),Voltage(V),Charge_Capacity(Ah),Discharge_Capacity(Ah),Charge_Energy(Wh),Discharge_Energy(Wh),dV/dt(V/s),Internal_Resistance(Ohm),Is_FC_Data,AC_Impedance(Ohm),ACI_Phase_Angle(Deg)
0,1,1.015532,2016-02-24 09:27:22,1.015533,1,1,0.0,4.158754,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
1,2,2.03113,2016-02-24 09:27:23,2.031131,1,1,0.0,4.158754,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
2,3,3.046728,2016-02-24 09:27:24,3.046729,1,1,0.0,4.158916,0.0,0.0,0.0,0.0,3.2e-05,0.0,0,0,0
3,4,4.046772,2016-02-24 09:27:25,4.046773,1,1,0.0,4.158754,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
4,5,5.062327,2016-02-24 09:27:26,5.062328,1,1,0.0,4.158754,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
