In [2]:
import os
import re
import pandas as pd

path = [
    r'C:\Users\Ryu\Python_project\data\battery251027\Rawdata\250207_250307_3_김동진_1689mAh_ATL Q7M Inner 2C 상온수명 1-100cyc'
]

In [3]:
# 충방전기 구분 (패턴 폴더 유무로 구분)
def check_cycler(raw_file_path): 
    # 충방전기 데이터 폴더에 패턴 폴더 유무로 PNE와 Toyo 구분, PNE이면 True, Toyo이면 False
    cycler = os.path.isdir(raw_file_path + "\\Pattern")
    return cycler

# filepath 이름에서 용량을 추출하는 함수
def name_capacity(data_file_path):
    # 원시 문자열을 사용하여 특수 문자를 공백으로 대체
    raw_file_path = re.sub(r'[._@\$$$$$$\(\)]', ' ', data_file_path)
    # 정규 표현식을 사용하여 "mAh"로 끝나는 용량 값을 찾습니다. (소수점 포함)
    match = re.search(r'(\d+([\-.]\d+)?)mAh', raw_file_path)
    # 소수점 용량을 위해 -를 .으로 변환
    min_cap = match.group(1).replace('-', '.')
    # 일치하는 값이 있으면 실수로 변환하여 반환
    if min_cap:
        return float(min_cap)
    # 용량 값이 없으면 None을 반환하거나 오류를 발생시킵니다.
    return None

In [None]:
def load_and_merge_data(base_path):
    """
    Scans for numeric subfolders in base_path, reads all 6-digit-named files
    within them, merges files per folder, and then merges all folder data.
    """
    try:
        # Discover and sort numeric subdirectories
        num_folders = sorted(
            [
                folder
                for folder in os.listdir(base_path)
                if os.path.isdir(os.path.join(base_path, folder)) and folder.isdigit()
            ],
            key=int,
        )
        print(f"Numeric folders found: {num_folders}")
    except FileNotFoundError:
        print(f"Error: Directory not found at {base_path}")
        return pd.DataFrame()

    all_folder_data = []
    # Iterate through each folder
    for folder in num_folders:
        folder_path = os.path.join(base_path, folder)
        file_list = sorted([f for f in os.listdir(folder_path) if re.match(r"^\d{6}$", f)])
        
        print(f"Processing Folder: {folder}, Files: {file_list}")
        
        # Create a list to hold data for the current folder
        current_folder_files = []
        for file in file_list:
            file_path = os.path.join(folder_path, file)
            try:
                # Read file and append to the folder-specific list
                data = pd.read_csv(file_path, sep=",", skiprows=3, engine="c", encoding="cp949", on_bad_lines='skip')
                print(f"Read data from {file_path} with shape {data.shape}")
                current_folder_files.append(data)
            except Exception as e:
                print(f"Could not read file {file_path}: {e}")
        
        # Merge all files within the current folder
        if current_folder_files:
            merged_folder_df = pd.concat(current_folder_files, ignore_index=True)
            print(f"  -> Merged data for folder '{folder}' has shape {merged_folder_df.shape}")
            all_folder_data.append(merged_folder_df)

    # Concatenate all merged folder-dataframes at once
    if all_folder_data:
        merged_data = pd.concat(all_folder_data, ignore_index=True)
        print("\nAll data has been merged successfully.")
        print("Shape of final merged data:", merged_data.shape)
        return merged_data
    else:
        print("\nNo data was loaded to merge.")
        return pd.DataFrame()

# --- Main script execution ---
cycler = check_cycler(path[0])
capacity = name_capacity(path[0])

# Call the function to load the data
merged_data = load_and_merge_data(path[0])
if not merged_data.empty:
    merged_data.head()

Numeric folders found: ['30', '31']
Processing Folder: 30, Files: ['000001', '000002', '000003', '000004', '000005', '000006', '000007', '000008', '000009', '000010', '000011', '000012', '000013', '000014', '000015', '000016', '000017', '000018', '000019', '000020', '000021', '000022', '000023', '000024', '000025', '000026', '000027', '000028', '000029', '000030', '000031', '000032', '000033', '000034', '000035', '000036', '000037', '000038', '000039', '000040', '000041', '000042', '000043', '000044', '000045', '000046', '000047', '000048', '000049', '000050', '000051', '000052', '000053', '000054', '000055', '000056', '000057', '000058', '000059', '000060', '000061', '000062', '000063', '000064', '000065', '000066', '000067', '000068', '000069', '000070', '000071', '000072', '000073', '000074', '000075', '000076', '000077', '000078', '000079', '000080', '000081', '000082', '000083', '000084', '000085', '000086', '000087', '000088', '000089', '000090', '000091', '000092', '000093', '00

In [None]:
# merged data is now available for further processing
# --- Main script execution ---

In [8]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
sns.set_style("whitegrid")

# Check if merged_data exists and is not empty before proceeding
if 'merged_data' in locals() and not merged_data.empty:
    print("--- Data Verification ---")
    print(f"Shape of the merged data: {merged_data.shape}")
    
    print("\nData Info:")
    merged_data.info()
    
    print("\nDescriptive Statistics:")
    print(merged_data.describe())
    
    print("\nChecking for missing values:")
    print(merged_data.isnull().sum())

    # --- Data Visualization ---
    print("\n--- Plotting Voltage and Current ---")
    
    # Assuming standard column names 'Voltage(V)' and 'Current(A)'. 
    # Please change these if your column names are different.
    voltage_col = 'Voltage(V)'
    current_col = 'Current(A)'

    if voltage_col in merged_data.columns and current_col in merged_data.columns:
        plt.figure(figsize=(15, 10))

        # Plot Voltage
        plt.subplot(2, 1, 1)
        plt.plot(merged_data.index, merged_data[voltage_col], label=voltage_col, color='blue')
        plt.title('Voltage over Time')
        plt.xlabel('Time Step')
        plt.ylabel('Voltage (V)')
        plt.legend()

        # Plot Current
        plt.subplot(2, 1, 2)
        plt.plot(merged_data.index, merged_data[current_col], label=current_col, color='green')
        plt.title('Current over Time')
        plt.xlabel('Time Step')
        plt.ylabel('Current (A)')
        plt.legend()

        plt.tight_layout()
        plt.show()
    else:
        print(f"\nCould not find '{voltage_col}' or '{current_col}' columns for plotting.")
        print(f"Available columns are: {merged_data.columns.tolist()}")

else:
    print("merged_data DataFrame is not available or is empty. Cannot perform verification and visualization.")

--- Data Verification ---
Shape of the merged data: (38815, 16)

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38815 entries, 0 to 38814
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           38815 non-null  object 
 1   Time           38815 non-null  object 
 2   PassTime[Sec]  38815 non-null  int64  
 3   Voltage[V]     38815 non-null  float64
 4   Current[mA]    38815 non-null  float64
 5   Unnamed: 5     0 non-null      float64
 6   Unnamed: 6     0 non-null      float64
 7   Temp1[Deg]     38815 non-null  float64
 8   Unnamed: 8     0 non-null      float64
 9   Unnamed: 9     0 non-null      float64
 10  Unnamed: 10    0 non-null      float64
 11  Condition      38815 non-null  int64  
 12  Mode           38815 non-null  int64  
 13  Cycle          38815 non-null  int64  
 14  TotlCycle      38815 non-null  int64  
 15  Temp1[Deg].1   38815 non-null  float64
dtypes: float64(9), int