In [None]:
import pandas as pd
import numpy as np
import scipy.io
import os
import zipfile
import glob
from pathlib import Path

# Base paths
battery_data_path = '../5. Battery Data Set'
data_folder = '../data'
organized_folder = '../data/organized_batteries'

# Create main organized folder
os.makedirs(organized_folder, exist_ok=True)

print(f"Battery data source: {battery_data_path}")
print(f"Data folder: {data_folder}")
print(f"Organized folder: {organized_folder}")

In [None]:
# Extract all zip files to get all battery data (B0001 to B0056)
zip_files = glob.glob(os.path.join(battery_data_path, '*.zip'))
print(f"Found {len(zip_files)} zip files\n")

# Extract all zip files
for zip_file in zip_files:
    zip_name = os.path.basename(zip_file)
    print(f"Extracting {zip_name}...")
    
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(data_folder)
        extracted_count = len(zip_ref.namelist())
        print(f"  Extracted {extracted_count} files")

# List all .mat files
mat_files = sorted(glob.glob(os.path.join(data_folder, '*.mat')))
print(f"\nTotal .mat files found: {len(mat_files)}")

# Show which battery files we have
battery_numbers = []
for mat_file in mat_files:
    battery_name = os.path.basename(mat_file).replace('.mat', '')
    if battery_name.startswith('B'):
        try:
            battery_num = int(battery_name[1:])
            battery_numbers.append(battery_num)
        except:
            pass

if battery_numbers:
    battery_numbers.sort()
    print(f"Battery range: B{battery_numbers[0]:04d} to B{battery_numbers[-1]:04d}")
    print(f"Available batteries: {battery_numbers}")

In [None]:
# Create folder structure for B0001 to B0056
# Organize into subfolders: B0001-B0010, B0011-B0020, etc.

def create_battery_folder(battery_num, base_folder):
    """Create folder and subfolder for battery"""
    # Determine subfolder (groups of 10)
    group_start = ((battery_num - 1) // 10) * 10 + 1
    group_end = group_start + 9
    subfolder_name = f"B{group_start:04d}_to_B{group_end:04d}"
    
    # Create path
    subfolder_path = os.path.join(base_folder, subfolder_name)
    battery_folder = os.path.join(subfolder_path, f"B{battery_num:04d}")
    
    # Create directories
    os.makedirs(battery_folder, exist_ok=True)
    
    return battery_folder

# Create folders for B0001 to B0056
print("Creating folder structure for B0001 to B0056...")
created_folders = []

for battery_num in range(1, 57):
    folder = create_battery_folder(battery_num, organized_folder)
    created_folders.append(folder)

print(f"Created {len(created_folders)} battery folders")

# Show folder structure
print("\nFolder structure:")
subfolders = sorted(set([os.path.dirname(f) for f in created_folders]))
for subfolder in subfolders:
    subfolder_name = os.path.basename(subfolder)
    batteries_in_subfolder = [os.path.basename(f) for f in created_folders if os.path.dirname(f) == subfolder]
    print(f"  {subfolder_name}/ ({len(batteries_in_subfolder)} batteries)")

In [None]:
# Function to extract all data from battery .mat file
def extract_all_battery_data(mat_file_path):
    """Extract charge, discharge, and impedance data from battery MATLAB file"""
    
    try:
        mat_data = scipy.io.loadmat(mat_file_path)
        battery_name = os.path.basename(mat_file_path).replace('.mat', '')
        
        battery_data = mat_data[battery_name][0, 0]
        cycles = battery_data['cycle'][0]
        
        discharge_data = []
        charge_data = []
        impedance_data = []
        
        for i, cycle in enumerate(cycles):
            cycle_type = str(cycle['type'][0])
            data = cycle['data'][0, 0]
            
            if 'discharge' in cycle_type:
                df = pd.DataFrame({
                    'Cycle': i + 1,
                    'Voltage_measured': data['Voltage_measured'][0, 0].flatten(),
                    'Current_measured': data['Current_measured'][0, 0].flatten(),
                    'Temperature_measured': data['Temperature_measured'][0, 0].flatten(),
                    'Time': data['Time'][0, 0].flatten(),
                })
                if 'Capacity' in data.dtype.names:
                    capacity = data['Capacity'][0, 0].flatten()
                    if len(capacity) > 0:
                        df['Capacity'] = capacity[0]
                discharge_data.append(df)
                
            elif 'charge' in cycle_type:
                df = pd.DataFrame({
                    'Cycle': i + 1,
                    'Voltage_measured': data['Voltage_measured'][0, 0].flatten(),
                    'Current_measured': data['Current_measured'][0, 0].flatten(),
                    'Temperature_measured': data['Temperature_measured'][0, 0].flatten(),
                    'Time': data['Time'][0, 0].flatten(),
                })
                charge_data.append(df)
                
            elif 'impedance' in cycle_type:
                df = pd.DataFrame({
                    'Cycle': i + 1,
                })
                # Add impedance-specific fields if they exist
                for field in ['Battery_impedance', 'Rectified_impedance', 'Re', 'Rct']:
                    if field in data.dtype.names:
                        df[field] = data[field][0, 0].flatten()
                impedance_data.append(df)
        
        result = {}
        if discharge_data:
            result['discharge'] = pd.concat(discharge_data, ignore_index=True)
        if charge_data:
            result['charge'] = pd.concat(charge_data, ignore_index=True)
        if impedance_data:
            result['impedance'] = pd.concat(impedance_data, ignore_index=True)
            
        return result
    
    except Exception as e:
        print(f"Error processing {os.path.basename(mat_file_path)}: {str(e)}")
        return None

print("Function defined: extract_all_battery_data()")

In [None]:
# Process all battery files and save to organized folders
all_battery_dataframes = {}

print("Processing all battery files...\n")

for mat_file in mat_files:
    battery_name = os.path.basename(mat_file).replace('.mat', '')
    
    if battery_name.startswith('B'):
        try:
            battery_num = int(battery_name[1:])
            
            print(f"Processing {battery_name}...")
            
            # Extract data
            data_dict = extract_all_battery_data(mat_file)
            
            if data_dict:
                # Get battery folder
                battery_folder = create_battery_folder(battery_num, organized_folder)
                
                # Save each dataframe type
                for data_type, df in data_dict.items():
                    csv_filename = f"{battery_name}_{data_type}.csv"
                    csv_path = os.path.join(battery_folder, csv_filename)
                    df.to_csv(csv_path, index=False)
                    print(f"  Saved {data_type}: {df.shape} -> {csv_filename}")
                
                # Store in memory
                all_battery_dataframes[battery_name] = data_dict
                
        except Exception as e:
            print(f"  Error with {battery_name}: {str(e)}")

print(f"\n=== Processing Complete ===")
print(f"Total batteries processed: {len(all_battery_dataframes)}")
print(f"Batteries: {list(all_battery_dataframes.keys())}")

In [None]:
# Display summary and sample data
print("=== SUMMARY ===\n")

# Show folder structure
print("Folder Structure:")
for subfolder in sorted(os.listdir(organized_folder)):
    subfolder_path = os.path.join(organized_folder, subfolder)
    if os.path.isdir(subfolder_path):
        battery_folders = os.listdir(subfolder_path)
        print(f"  {subfolder}/ ({len(battery_folders)} batteries)")

# Show sample data from first battery
if all_battery_dataframes:
    first_battery = list(all_battery_dataframes.keys())[0]
    print(f"\n=== Sample Data: {first_battery} ===")
    
    for data_type, df in all_battery_dataframes[first_battery].items():
        print(f"\n{data_type.upper()} Data:")
        print(f"  Shape: {df.shape}")
        print(f"  Columns: {list(df.columns)}")
        print(f"  First 3 rows:")
        print(df.head(3).to_string(index=False))
        
        # Show capacity degradation for discharge data
        if data_type == 'discharge' and 'Capacity' in df.columns:
            capacity_by_cycle = df.groupby('Cycle')['Capacity'].first()
            if len(capacity_by_cycle) > 0:
                print(f"\n  Capacity Summary:")
                print(f"    Initial: {capacity_by_cycle.iloc[0]:.4f} Ahr")
                print(f"    Final: {capacity_by_cycle.iloc[-1]:.4f} Ahr")
                fade = (capacity_by_cycle.iloc[0] - capacity_by_cycle.iloc[-1]) / capacity_by_cycle.iloc[0] * 100
                print(f"    Fade: {fade:.2f}%")