# CALB Battery Data Exploration and Visualization

This notebook demonstrates how to work with the CALB battery data pickle files, extract useful information, and create insightful visualizations for battery performance analysis.

## 1. Setup and Data Loading

First, let's import the necessary libraries and define functions to load and explore the pickle files.

In [None]:
import os
import pickle
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("notebook", font_scale=1.2)

# Directory containing pickle files
data_dir = "CALB"

In [None]:
# Function to load pickle files
def load_pickle(file_path):
    """Load a pickle file and return its contents."""
    with open(file_path, 'rb') as f:
        return pickle.load(f)

# Get list of all pickle files
pickle_files = glob.glob(os.path.join(data_dir, "*.pkl"))
print(f"Found {len(pickle_files)} pickle files in {data_dir} directory.")

# Display file names grouped by temperature
temps = {}
for file in pickle_files:
    base_name = os.path.basename(file)
    parts = base_name.split('_')
    if len(parts) >= 3:
        temp = parts[1]
        if temp not in temps:
            temps[temp] = []
        temps[temp].append(base_name)

print("\nFiles grouped by temperature:")
for temp, files in temps.items():
    print(f"\nTemperature {temp}°C: {len(files)} files")
    for file in files[:5]:  # Show first 5 files for each temperature
        print(f"  - {file}")
    if len(files) > 5:
        print(f"  - ... and {len(files) - 5} more")

## 2. Exploring Data Structure

Let's load a sample file to explore its structure and contents.

In [None]:
# Select a sample file
sample_file = pickle_files[0] if pickle_files else None

if sample_file:
    print(f"Exploring sample file: {os.path.basename(sample_file)}")
    
    # Load the data
    data = load_pickle(sample_file)
    
    # Check the type
    print(f"\nData type: {type(data)}")
    
    # If it's a dictionary, show its keys
    if isinstance(data, dict):
        print(f"\nDictionary keys ({len(data.keys())}):\n")
        for key in data.keys():
            print(f"- {key}: {type(data[key])}")
else:
    print("No pickle files found in the directory.")

## 3. Examining Metadata

Let's extract and display the metadata for the battery cell (excluding the cycle data).

In [None]:
# Extract metadata (all keys except cycle_data)
if isinstance(data, dict) and 'cycle_data' in data:
    metadata = {k: v for k, v in data.items() if k != 'cycle_data'}
    
    # Convert metadata to DataFrame for better display
    metadata_df = pd.DataFrame([metadata])
    
    # Display the metadata
    print("Battery Cell Metadata:")
    for key, value in metadata.items():
        print(f"{key}: {value}")

## 4. Analyzing Cycle Data

Now let's examine the cycle data to understand the charge/discharge cycles.

In [None]:
# Check the number of cycles
if isinstance(data, dict) and 'cycle_data' in data:
    cycles = data['cycle_data']
    print(f"Number of cycles: {len(cycles)}")
    
    # Look at the first cycle
    if cycles:
        first_cycle = cycles[0]
        print(f"\nFirst cycle (#{first_cycle.get('cycle_number', 'N/A')}) keys:")
        for key in first_cycle.keys():
            value = first_cycle[key]
            if isinstance(value, list):
                print(f"- {key}: List with {len(value)} items, sample: {value[:3]} ...")
            else:
                print(f"- {key}: {value}")
                
        # Check the data lengths
        data_lengths = {}
        for key, value in first_cycle.items():
            if isinstance(value, list):
                data_lengths[key] = len(value)
        
        print("\nData points per variable:")
        for key, length in data_lengths.items():
            print(f"- {key}: {length} points")

## 5. Extract Key Battery Metrics

Let's extract key metrics for each cycle, such as max charge/discharge capacity and coulombic efficiency.

In [None]:
# Extract cycle metrics
if isinstance(data, dict) and 'cycle_data' in data:
    cycle_metrics = []
    
    for cycle in data['cycle_data']:
        cycle_num = cycle.get('cycle_number', None)
        
        # Get max voltage and current
        max_voltage = max(cycle.get('voltage_in_V', [0])) if cycle.get('voltage_in_V') else None
        min_voltage = min(cycle.get('voltage_in_V', [float('inf')])) if cycle.get('voltage_in_V') else None
        max_current = max(cycle.get('current_in_A', [0])) if cycle.get('current_in_A') else None
        min_current = min(cycle.get('current_in_A', [float('inf')])) if cycle.get('current_in_A') else None
        
        # Get charge and discharge capacities
        charge_cap = max(cycle.get('charge_capacity_in_Ah', [0])) if cycle.get('charge_capacity_in_Ah') else None
        discharge_cap = max(cycle.get('discharge_capacity_in_Ah', [0])) if cycle.get('discharge_capacity_in_Ah') else None
        
        # Calculate coulombic efficiency if both charge and discharge capacity are available
        coulombic_efficiency = None
        if charge_cap and discharge_cap and charge_cap > 0:
            coulombic_efficiency = (discharge_cap / charge_cap) * 100
        
        cycle_metrics.append({
            'cycle_number': cycle_num,
            'max_voltage_V': max_voltage,
            'min_voltage_V': min_voltage,
            'max_current_A': max_current,
            'min_current_A': min_current,
            'charge_capacity_Ah': charge_cap,
            'discharge_capacity_Ah': discharge_cap,
            'coulombic_efficiency_%': coulombic_efficiency
        })
    
    # Convert to DataFrame
    metrics_df = pd.DataFrame(cycle_metrics)
    
    # Display the metrics
    print("First 5 cycles metrics:")
    display(metrics_df.head())
    
    # Display summary statistics
    print("\nSummary statistics:")
    display(metrics_df.describe())

## 6. Capacity Fade Analysis

Let's visualize the capacity fade and coulombic efficiency over cycles.

In [None]:
# Plot capacity fade
if 'metrics_df' in locals() and not metrics_df.empty:
    plt.figure(figsize=(12, 8))
    
    # Create subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
    
    # Plot capacity fade
    ax1.plot(metrics_df['cycle_number'], metrics_df['charge_capacity_Ah'], 'b-o', label='Charge Capacity')
    ax1.plot(metrics_df['cycle_number'], metrics_df['discharge_capacity_Ah'], 'r-o', label='Discharge Capacity')
    ax1.set_ylabel('Capacity (Ah)')
    ax1.set_title(f'Capacity Fade for {os.path.basename(sample_file).split(".")[0]}')
    ax1.legend()
    ax1.grid(True)
    
    # Plot coulombic efficiency
    ax2.plot(metrics_df['cycle_number'], metrics_df['coulombic_efficiency_%'], 'g-o')
    ax2.set_xlabel('Cycle Number')
    ax2.set_ylabel('Coulombic Efficiency (%)')
    ax2.set_title('Coulombic Efficiency')
    ax2.set_ylim([0, 105])  # Efficiency typically between 0-100%
    ax2.grid(True)
    
    plt.tight_layout()
    plt.show()

## 7. Voltage Curves Analysis

Let's plot voltage vs. capacity curves for selected cycles to see how they change over time.

In [None]:
# Plot voltage curves for selected cycles
if isinstance(data, dict) and 'cycle_data' in data:
    cycles = data['cycle_data']
    
    # Choose a few cycles to plot (first, 25%, 50%, 75%, last)
    if len(cycles) > 0:
        n_cycles = len(cycles)
        indices_to_plot = [0]
        if n_cycles > 4:
            indices_to_plot.extend([n_cycles//4, n_cycles//2, 3*n_cycles//4])
        indices_to_plot.append(-1)
        
        plt.figure(figsize=(14, 8))
        
        for idx in indices_to_plot:
            if idx < 0:
                idx = len(cycles) + idx  # Convert negative index
            
            cycle = cycles[idx]
            cycle_num = cycle.get('cycle_number', idx)
            
            # Check if we have voltage and capacity data
            if 'voltage_in_V' in cycle and 'charge_capacity_in_Ah' in cycle:
                plt.plot(cycle['charge_capacity_in_Ah'], cycle['voltage_in_V'], 
                        label=f'Cycle {cycle_num} (Charge)')
            
            # If we have discharge data, plot it too
            if 'voltage_in_V' in cycle and 'discharge_capacity_in_Ah' in cycle:
                # Filter out zero discharge capacity points
                non_zero_indices = [i for i, cap in enumerate(cycle['discharge_capacity_in_Ah']) if cap > 0]
                if non_zero_indices:
                    discharge_capacities = [cycle['discharge_capacity_in_Ah'][i] for i in non_zero_indices]
                    discharge_voltages = [cycle['voltage_in_V'][i] for i in non_zero_indices]
                    plt.plot(discharge_capacities, discharge_voltages, 
                            linestyle='--', label=f'Cycle {cycle_num} (Discharge)')
        
        plt.xlabel('Capacity (Ah)')
        plt.ylabel('Voltage (V)')
        plt.title(f'Voltage vs. Capacity Curves for {os.path.basename(sample_file).split(".")[0]}')
        plt.legend()
        plt.grid(True)
        plt.show()

## 8. Comparison Across Different Temperatures

Let's compare the capacity retention for batteries tested at different temperatures.

In [None]:
# Function to extract capacity metrics from a file
def extract_capacity_metrics(file_path):
    # Extract temperature from filename
    base_name = os.path.basename(file_path)
    parts = base_name.split('_')
    temp = parts[1] if len(parts) >= 3 else 'Unknown'
    
    # Load the data
    try:
        data = load_pickle(file_path)
        
        if isinstance(data, dict) and 'cycle_data' in data:
            # Extract cell_id
            cell_id = data.get('cell_id', base_name.split('.')[0])
            
            # Extract capacity metrics
            cycle_metrics = []
            for cycle in data['cycle_data']:
                cycle_num = cycle.get('cycle_number', None)
                if cycle_num is None:
                    continue
                
                # Get charge and discharge capacities
                charge_cap = max(cycle.get('charge_capacity_in_Ah', [0])) if cycle.get('charge_capacity_in_Ah') else 0
                discharge_cap = max(cycle.get('discharge_capacity_in_Ah', [0])) if cycle.get('discharge_capacity_in_Ah') else 0
                
                cycle_metrics.append({
                    'cell_id': cell_id,
                    'temperature': temp,
                    'cycle_number': cycle_num,
                    'charge_capacity_Ah': charge_cap,
                    'discharge_capacity_Ah': discharge_cap
                })
            
            return pd.DataFrame(cycle_metrics)
        
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
    
    return pd.DataFrame()

# Sample a few files from each temperature group
sample_files = []
for temp, files in temps.items():
    # Take up to 2 files from each temperature group
    for file in files[:2]:
        sample_files.append(os.path.join(data_dir, file))

# Extract capacity metrics for each sample file
all_metrics = []
for file_path in sample_files:
    metrics = extract_capacity_metrics(file_path)
    if not metrics.empty:
        all_metrics.append(metrics)

# Combine all metrics
if all_metrics:
    combined_metrics = pd.concat(all_metrics, ignore_index=True)
    
    # Display the first few rows
    print("Combined metrics for different cells:")
    display(combined_metrics.head())
    
    # Plot capacity fade comparison across temperatures
    plt.figure(figsize=(14, 8))
    
    # Group by temperature and cell_id
    for (temp, cell), group in combined_metrics.groupby(['temperature', 'cell_id']):
        plt.plot(group['cycle_number'], group['discharge_capacity_Ah'], 
                marker='o', linestyle='-', label=f'{cell} ({temp}°C)')
    
    plt.xlabel('Cycle Number')
    plt.ylabel('Discharge Capacity (Ah)')
    plt.title('Discharge Capacity Comparison Across Different Temperatures')
    plt.legend()
    plt.grid(True)
    plt.show()
else:
    print("No metrics data available for comparison.")

## 9. Battery Degradation Analysis

Let's analyze the degradation rate by calculating capacity retention percentage over cycles.

In [None]:
# Calculate capacity retention for cells
if 'combined_metrics' in locals() and not combined_metrics.empty:
    # Calculate capacity retention
    retention_data = []
    
    for (temp, cell), group in combined_metrics.groupby(['temperature', 'cell_id']):
        # Sort by cycle number
        group = group.sort_values('cycle_number')
        
        # Get initial capacity (first cycle)
        initial_capacity = group['discharge_capacity_Ah'].iloc[0]
        if initial_capacity > 0:
            # Calculate retention for each cycle
            for _, row in group.iterrows():
                retention = (row['discharge_capacity_Ah'] / initial_capacity) * 100
                retention_data.append({
                    'cell_id': cell,
                    'temperature': temp,
                    'cycle_number': row['cycle_number'],
                    'discharge_capacity_Ah': row['discharge_capacity_Ah'],
                    'capacity_retention_%': retention
                })
    
    # Convert to DataFrame
    retention_df = pd.DataFrame(retention_data)
    
    # Plot capacity retention
    plt.figure(figsize=(14, 8))
    
    for (temp, cell), group in retention_df.groupby(['temperature', 'cell_id']):
        plt.plot(group['cycle_number'], group['capacity_retention_%'], 
                marker='o', linestyle='-', label=f'{cell} ({temp}°C)')
    
    plt.xlabel('Cycle Number')
    plt.ylabel('Capacity Retention (%)')
    plt.title('Capacity Retention Comparison Across Different Temperatures')
    plt.legend()
    plt.grid(True)
    plt.axhline(y=80, color='r', linestyle='--', label='80% Retention (End of Life)')
    plt.legend()
    plt.show()

## 10. Advanced Analysis: Differential Voltage Analysis

Differential voltage analysis (dV/dQ) can reveal more subtle changes in battery behavior that indicate specific degradation mechanisms.

In [None]:
# Function to calculate differential voltage
def calculate_dVdQ(voltage, capacity):
    # Calculate difference
    dV = np.diff(voltage)
    dQ = np.diff(capacity)
    
    # Avoid division by zero
    dVdQ = np.zeros_like(dV)
    non_zero_indices = dQ != 0
    dVdQ[non_zero_indices] = dV[non_zero_indices] / dQ[non_zero_indices]
    
    # Return dV/dQ and the corresponding capacity values (midpoints)
    capacity_mid = (np.array(capacity[:-1]) + np.array(capacity[1:])) / 2
    
    return dVdQ, capacity_mid

# Perform differential voltage analysis on a sample cycle
if isinstance(data, dict) and 'cycle_data' in data and data['cycle_data']:
    # Choose cycles to analyze (first, middle, last)
    cycles = data['cycle_data']
    cycle_indices = [0, len(cycles)//2, -1]
    
    plt.figure(figsize=(14, 8))
    
    for idx in cycle_indices:
        if idx < 0:
            idx = len(cycles) + idx  # Convert negative index
        
        cycle = cycles[idx]
        cycle_num = cycle.get('cycle_number', idx)
        
        # Check if we have voltage and capacity data
        if 'voltage_in_V' in cycle and 'charge_capacity_in_Ah' in cycle:
            # Get charge data
            voltage = cycle['voltage_in_V']
            capacity = cycle['charge_capacity_in_Ah']
            
            # Calculate dV/dQ
            dVdQ, capacity_mid = calculate_dVdQ(voltage, capacity)
            
            # Filter out extreme values
            max_limit = np.percentile(np.abs(dVdQ), 95)  # 95th percentile as max limit
            mask = np.abs(dVdQ) <= max_limit
            
            # Plot dV/dQ vs capacity
            plt.plot(capacity_mid[mask], dVdQ[mask], label=f'Cycle {cycle_num}')
    
    plt.xlabel('Capacity (Ah)')
    plt.ylabel('dV/dQ (V/Ah)')
    plt.title('Differential Voltage Analysis')
    plt.legend()
    plt.grid(True)
    plt.show()
    
    print("\nDifferential Voltage Analysis (dV/dQ):")
    print("- Peaks represent phase transitions in electrode materials")
    print("- Changes in peak position/height indicate degradation mechanisms")
    print("- Shift to left: loss of active material")
    print("- Decrease in peak height: impedance increase")

## 11. Conclusion and Next Steps

This notebook has demonstrated various techniques for analyzing CALB battery data, including:

1. Loading and exploring the data structure
2. Extracting key battery performance metrics
3. Visualizing capacity fade and coulombic efficiency
4. Comparing voltage curves across different cycles
5. Analyzing battery degradation across different temperatures
6. Performing differential voltage analysis

### Potential Next Steps:

1. **Machine Learning Models**: Develop predictive models for remaining useful life estimation
2. **Feature Engineering**: Extract more advanced features from voltage and current curves
3. **Statistical Analysis**: Perform detailed statistical comparisons across different temperatures
4. **Degradation Mechanism Identification**: Use advanced techniques to identify specific degradation mechanisms
5. **State of Health Estimation**: Develop algorithms for real-time state of health monitoring

In [None]:
# Save a list of all available cells with their temperatures
cell_info = []
for file in pickle_files:
    base_name = os.path.basename(file)
    parts = base_name.split('_')
    if len(parts) >= 3:
        temp = parts[1]
        cell_id = base_name.split('.')[0]
        cell_info.append({
            'cell_id': cell_id,
            'temperature': temp,
            'file_path': file
        })

cell_info_df = pd.DataFrame(cell_info)
print("Battery Cell Information:")
display(cell_info_df)

# Save this information to CSV for future reference
cell_info_df.to_csv('battery_cell_inventory.csv', index=False)
print("\nSaved battery cell inventory to 'battery_cell_inventory.csv'")