In [None]:
# 1_数据描述分析.ipynb
import numpy as np
import matplotlib.pyplot as plt
from utils import BatteryDataLoader, StatisticalAnalyzer

# 1. 加载数据
folder_path = "data/Battery Data Set/1. BatteryAgingARC-FY08Q4"
lifetimes, initial_capacities = BatteryDataLoader.extract_lifetimes_from_folder(folder_path)

print("=== 电池寿命描述性统计 ===")
print(f"样本数量: {len(lifetimes)}")
print(f"平均寿命: {np.mean(lifetimes):.1f} 次循环")
print(f"寿命标准差: {np.std(lifetimes):.1f}")
print(f"最小寿命: {np.min(lifetimes)} 次循环")
print(f"最大寿命: {np.max(lifetimes)} 次循环")
print(f"中位数: {np.median(lifetimes)} 次循环")


In [None]:

# 2. 绘制直方图
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(lifetimes, bins=15, edgecolor='black', alpha=0.7)
plt.xlabel('电池寿命（循环次数）', fontsize=12)
plt.ylabel('频数', fontsize=12)
plt.title('电池寿命分布直方图', fontsize=14)
plt.grid(True, alpha=0.3)


In [None]:

# 3. 箱线图
plt.subplot(1, 2, 2)
plt.boxplot(lifetimes, vert=False)
plt.xlabel('电池寿命（循环次数）', fontsize=12)
plt.title('电池寿命箱线图', fontsize=14)
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('寿命分布.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:

# 4. 初始容量分析
print("\n=== 初始容量分析 ===")
capacity_threshold = 1.0  # 假设标称容量为1.0Ah

# 模拟初始不良品（容量低于阈值）
defective_mask = initial_capacities < capacity_threshold
defective_rate = np.mean(defective_mask)

print(f"初始容量平均值: {np.mean(initial_capacities):.3f} Ah")
print(f"初始容量标准差: {np.std(initial_capacities):.3f} Ah")
print(f"模拟初始不良品率: {defective_rate:.3%}")
print(f"不良品数量: {np.sum(defective_mask)} / {len(defective_mask)}")


In [None]:

# 5. 定义随机事件
print("\n=== 随机事件定义 ===")
threshold_life = 500  # 设定寿命阈值
event_B = lifetimes < threshold_life
print(f"事件A（初始不良品）: P(A) = {defective_rate:.3%}")
print(f"事件B（寿命<{threshold_life}次）: P(B) = {np.mean(event_B):.3%}")

# 保存结果
results_part1 = {
    'lifetimes': lifetimes,
    'initial_capacities': initial_capacities,
    'defective_rate': defective_rate,
    'event_B_prob': np.mean(event_B)
}