In [None]:
# =========================
# 0. Imports & Settings
# =========================
import os
import json
import warnings
from collections import Counter

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
    recall_score
)

from catboost import CatBoostClassifier, Pool

from src.data import FTRC_Data

warnings.filterwarnings("ignore")

# =========================
# 1. Load and Filter Data
# =========================
print("Loading FTRC battery failure dataset...")
data_train = FTRC_Data()

# Keep only fully charged cells (SOC = 100%)
data_train.df = data_train.df[
    data_train.df["Pre-Test-State-of-Charge-%"] == 100
]

##  直接把'Pre-Test-Cell-Open-Circuit-Voltage-V', 'Pre-Test-Cell-Mass-g',
## volumn 算出体积
## 然后质量除以体积，得到质量密度
## Cell-Energy-Wh （能量，加进来）,Bottom-Vent-Yes-No (有没有泄压阀，加进来)
## 然后Cell-Energy-Wh 除以质量 Pre-Test-Cell-Mass-g，得到质量能量密度,Gravimetric-Energy-Density-Wh/kg

##根据电池几何算出体积，volumn
## 除以体积，得到体积能量密度,Volumetric-Energy-Density-Wh/L


##最后，输入为：
# FEATURES_METADATA = [
#     "Cell-Description",
#     "Manufacturer",
#     "Geometry",
#     "Cell-Capacity-Ah",
#     "Trigger-Mechanism", 
#     
#     "Pre-Test-Cell-Open-Circuit-Voltage-V","Bottom-Vent-Yes-No","Gravimetric-Energy-Density-Wh/kg","Volumetric-Energy-Density-Wh/L"
# ]

Loading FTRC battery failure dataset...


In [None]:
import numpy as np

def cylinder_volume_L(code):
    code = int(code)
    
    diameter = code // 1000
    height = (code % 1000)/10
    
    radius = diameter / 2
    
    volume_mm3 = np.pi * (radius**2) * height
    
    return volume_mm3 / 1e6   # 转成 L

data_train.df["Cell_Volume_L"] = data_train.df["Geometry"].apply(cylinder_volume_L)
data_train.df["Volumetric-Energy-Density-Wh/L"] = data_train.df['Cell-Energy-Wh']/data_train.df["Cell_Volume_L"] 

data_train.df["Gravimetric-Energy-Density-Wh/kg"]=data_train.df['Cell-Energy-Wh']/data_train.df['Pre-Test-Cell-Mass-g']*1000

279.8756108396268


Index(['Cell-Description', 'Test-ID', 'Test-Series', 'Trigger-Mechanism',
       'S-FTRC-Generation', 'Data-Processing-Utility', 'Test-Date',
       'Cell-ID-Number', 'Pre-Test-State-of-Charge-%',
       'Pre-Test-Cell-Open-Circuit-Voltage-V', 'Pre-Test-Cell-Mass-g',
       'Pre-Test-Positive-Copper-Mesh-Mass-g',
       'Pre-Test-Negative-Copper-Mesh-Mass-g',
       'Pressure-Assisted-Seal-Configuration-Positive',
       'Pressure-Assisted-Seal-Configuration-Negative',
       'Cell-Failure-Mechanism', 'Baseline-Total-Energy-Yield-kJ',
       'Conductive-Heat-Loss-Rate-kJs-1',
       'Baseline-Plus-Heat-Loss-Total-Energy-Yield-kJ',
       'Corrected-Total-Energy-Yield-kJ', 'Energy-Fraction-Cell-Body-kJ',
       'Energy-Fraction-Positive-Ejecta-kJ',
       'Energy-Fraction-Negative-Ejecta-kJ', 'Energy-Percent-Cell-Body-%',
       'Energy-Percent-Positive-Ejecta-%', 'Energy-Percent-Negative-Ejecta-%',
       'Theoretical-Energy-Positive-Unrecovered-Mass-kJ',
       'Theoretical-Energy-Neg