In the NASA dataset, the first row of each cycle has both voltage and time values as zero, so we need to delete it. Also, when the voltage data drops to zero, the time continues to count, but we need to reset it. This file addresses the above issues, and the processed data is used for feature extraction.

In [3]:
import pandas as pd

# Load the Excel file
file_path = "./datacsv/battery_discharge_data_all.xlsx"  # Replace with your path
xls = pd.ExcelFile(file_path)

# Get all sheet names
sheet_names = xls.sheet_names

In [4]:
# Define the processing function
def process_discharge_sheet(df):
    # Clean up column names
    df.columns = df.columns.str.strip()
    if 'cycle' not in df.columns or 'voltage' not in df.columns:
        return df  # Skip sheets without necessary columns
    
    processed = []
    for cycle_id, group in df.groupby('cycle'):
        group = group.copy()
        
        # Delete the first row of each cycle
        group = group.iloc[1:]

        # If there is a Voltage = 0, truncate the data after that row (keep that row)
        zero_voltage_idx = group[group['voltage'] == 0].index
        if not zero_voltage_idx.empty:
            first_zero_idx = zero_voltage_idx[0]
            group = group.loc[:first_zero_idx]

        processed.append(group)

    return pd.concat(processed, ignore_index=True)

In [5]:
# Process all _discharge sheets
processed_sheets = {}
for sheet in sheet_names:
    print(sheet)
    if sheet.endswith('_discharge'):
        df = xls.parse(sheet)
        processed_df = process_discharge_sheet(df)
        processed_sheets[sheet] = processed_df

# Save as a new Excel file
output_path = "./datacsv/processed_battery_discharge_data.xlsx"
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
    for sheet_name, df in processed_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Processing complete, saved as: {output_path}")

B0005_discharge
B0006_discharge
B0007_discharge
B0018_discharge
index
Processing complete, saved as: ./datacsv/processed_battery_discharge_data.xlsx
