In [15]:
import pandas as pd
import os
#Fix the columns
COLUMN_MAPPING = {
    "Cycle_Index": ["Cycle_Index", "cycle_index", "Cycle"],
    "Discharge_Capacity (Ah)": ["Discharge_Capacity (Ah)", "Discharge_Capacity_Ah_", "Capacity"],
    "Test_Time (s)": ["Test_Time (s)", "test_time_s", "Time"],
    "Current (A)": ["Current (A)", "current_a", "I"],
    "Voltage (V)": ["Voltage (V)", "voltage_v", "V"],
    "Cell_Temperature (C)": ["Cell_Temperature (C)", "cell_temperature_c", "Temp"],
}


def standardize_columns(df):
    new_columns = {}
    
    for standard_col, variations in COLUMN_MAPPING.items():
        for var in variations:
            if var in df.columns:
                new_columns[var] = standard_col  

    df = df.rename(columns=new_columns)  
    return df

# Folder where CSVs are stored
folder_path = "battery/SNL_LFP"

# List all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    
    df = standardize_columns(df)  # Apply standardization
    
    # Save the cleaned file
    cleaned_path = file_path.replace(".csv", "_cleaned.csv")
    df.to_csv(cleaned_path, index=False)
    print(f"Standardized and saved: {cleaned_path}")


Standardized and saved: battery/SNL_LFP\SNL_18650_LFP_15C_0-100_0.5-1C_a_cycle_data_cleaned.csv


KeyboardInterrupt: 

In [10]:
import pandas as pd
import scipy.io
import os
import glob

def csv_to_mat(csv_filename, mat_filename):
    df = pd.read_csv(csv_filename)  # Read CSV
    data_dict = {'data': df.to_numpy()}  # Convert to dictionary
    scipy.io.savemat(mat_filename, data_dict)  # Save as .mat
    print(f"Converted {csv_filename} -> {mat_filename}")

input_folder = "battery/SNL_LFP"
output_folder = "battery_mat/updated_dataset"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Convert all CSV files in the input folder
csv_files = glob.glob(os.path.join(input_folder, "*.csv"))

for csv_file in csv_files:
    mat_file = os.path.join(output_folder, os.path.basename(csv_file).replace(".csv", ".mat"))
    csv_to_mat(csv_file, mat_file)


Converted battery/SNL_LFP\SNL_18650_LFP_15C_0-100_0.5-1C_a_cycle_data.csv -> battery_mat/updated_dataset\SNL_18650_LFP_15C_0-100_0.5-1C_a_cycle_data.mat


KeyboardInterrupt: 

In [11]:
import pandas as pd
import scipy.io
import os
import glob

def csv_to_mat(csv_filename, mat_filename, columns=None):
    """Convert a CSV file to .mat format while selecting specific columns."""
    df = pd.read_csv(csv_filename)

    if columns:
        df = df[columns]  # Select only required columns

    data_dict = {'data': df.to_numpy()}  # Convert to dictionary
    scipy.io.savemat(mat_filename, data_dict)  # Save as .mat
    print(f"Converted {csv_filename} -> {mat_filename}")

input_folder = "battery/SNL_LFP"
output_folder = "battery_mat/updated_dataset"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# List of CSV files (both cycle and timeseries data)
csv_files = glob.glob(os.path.join(input_folder, "*.csv"))

# Define relevant columns for timeseries data
timeseries_columns = ["cycle", "time", "current", "voltage", "cell temperature"]

for csv_file in csv_files:
    filename = os.path.basename(csv_file)

    # If it's a timeseries file, select only the relevant columns
    if "timeseries" in filename.lower():
        mat_file = os.path.join(output_folder, filename.replace(".csv", "_timeseries.mat"))
        csv_to_mat(csv_file, mat_file, columns=timeseries_columns)

    # If it's a cycle file, store the entire dataset
    elif "cycle" in filename.lower():
        mat_file = os.path.join(output_folder, filename.replace(".csv", "_cycle.mat"))
        csv_to_mat(csv_file, mat_file)

print("Processing complete for all battery data!")


Converted battery/SNL_LFP\SNL_18650_LFP_15C_0-100_0.5-1C_a_cycle_data.csv -> battery_mat/updated_dataset\SNL_18650_LFP_15C_0-100_0.5-1C_a_cycle_data_cycle.mat


KeyError: "None of [Index(['cycle', 'time', 'current', 'voltage', 'cell temperature'], dtype='object')] are in the [columns]"

In [12]:
import pandas as pd
import os
import glob

input_folder = "battery/SNL_LFP"

# Define required columns for timeseries data
required_columns = ["cycle", "time", "current", "voltage", "cell temperature"]

# List all CSV files
csv_files = glob.glob(os.path.join(input_folder, "*.csv"))

# Track files with missing columns
files_with_issues = {}

for csv_file in csv_files:
    filename = os.path.basename(csv_file)
    df = pd.read_csv(csv_file, nrows=5)  # Read only first 5 rows to check structure
    
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        files_with_issues[filename] = missing_columns

# Print the files that have issues
if files_with_issues:
    print("Files with missing columns:")
    for file, missing in files_with_issues.items():
        print(f"{file}: Missing columns -> {missing}")
else:
    print("All files have the required columns!")



Files with missing columns:
SNL_18650_LFP_15C_0-100_0.5-1C_a_cycle_data.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-1C_a_timeseries.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-1C_b_cycle_data.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-1C_b_timeseries.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-2C_a_cycle_data.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-2C_a_timeseries.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-2C_b_cycle_data.csv: Missing columns -> ['cycle', 'time', 'current', 'voltage', 'cell temperature']
SNL_18650_LFP_15C_0-100_0.5-2C_b_timeseries.csv: Missing columns -> ['cycle'

In [4]:
pip install scipy

Collecting scipy
  Using cached scipy-1.15.2-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached scipy-1.15.2-cp312-cp312-win_amd64.whl (40.9 MB)
Installing collected packages: scipy
Successfully installed scipy-1.15.2
Note: you may need to restart the kernel to use updated packages.
