In [1]:
import math
import os
import pickle
import random

import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import Dataset

In [2]:
# https://github.com/rdbraatz/data-driven-prediction-of-battery-cycle-life-before-capacity-degradation/blob/master/Load%20Data.ipynb

remove_keys = [
  # FOLLOWS THE ORIGINAL REPO
  # remove batteries that do not reach 80% capacity
  'b1c8', 'b1c10', 'b1c12', 'b1c13', 'b1c22',      
  # remove batteries from batch 2 that are moved to batch 1
  'b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16',      
  # remove noisy channels from batch3
  'b3c37', 'b3c2', 'b3c23', 'b3c32', 'b3c42', 'b3c43',
  
  # NEWLY DELETED HERE
  # remove outlier with abnormal capacity
  'b1c0', 'b1c18', 'b2c12', 'b2c44'
]
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482]

temperature_feature_interval = 1
current_feature_interval = 1

cap_outlier_diff_threshold = 0.015

curve_ratio_min = 0.5
curve_ratio_max = 0.99
curve_ratio_steps = 9
curve_ratio_digits = 2
curve_ratio_step_size = (curve_ratio_max - curve_ratio_min) / curve_ratio_steps

In [3]:
data_dir_path = './data/MIT Stanford battery dataset/'
batch1 = pickle.load(open(os.path.join(data_dir_path,'batch1.pkl'), 'rb'))
batch2 = pickle.load(open(os.path.join(data_dir_path,'batch2.pkl'), 'rb'))
batch3 = pickle.load(open(os.path.join(data_dir_path,'batch3.pkl'), 'rb'))

for i, bk in enumerate(batch1_keys):
  batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
  for j in batch1[bk]['summary'].keys():
    if j == 'cycle':
      batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
    else:
      batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
  last_cycle = len(batch1[bk]['cycles'].keys())
  for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
    batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]
    
batch = {**batch1, **batch2, **batch3}
for key in remove_keys:
  del batch[key]

In [4]:
max_temps = []
min_temps = []
for key in batch.keys():
  max_temps.append(np.max(batch[key]['summary']['Tmax']))
  min_temps.append(np.min(batch[key]['summary']['Tmin'][np.nonzero(batch[key]['summary']['Tmin'])]))
max_temp = np.max(max_temps)
min_temp = np.min(min_temps)

max_currents = []
min_currents = []
for key in batch.keys():
  for cycle in batch[key]['cycles'].keys():
    max_currents.append(np.max(batch[key]['cycles'][cycle]['I']))
    min_currents.append(np.min(batch[key]['cycles'][cycle]['I']))
max_current = np.max(max_currents)
min_current = np.min(min_currents)

In [5]:
pre_processed_data = {}

for key in batch.keys():
  # skip the first cycle as it is initial cycle with 0 values
  caps = batch[key]['summary']['QC'][1:]
  
  # remove outliers
  cap_diff = np.abs(np.concatenate(([0], np.diff(caps))))
  valid_idx = np.where(cap_diff < cap_outlier_diff_threshold)
  caps = caps[valid_idx] 
  
  #TODO: use different normalization??
  caps = caps/1.1
  
  cycles = batch[key]['cycles']  
  feats = []
  for cycle_id in valid_idx[0]:
    cycle = cycles[str(cycle_id)]
    time = cycle['t']
    current = cycle['I']
    temp = cycle['T']
    feat = [
      np.trapz((current == 0).astype(np.float32), time),
      np.trapz((current > 0).astype(np.float32), time),
      np.trapz((current < 0).astype(np.float32), time),
      max(current),
      abs(min(current))
    ]
      
    for t in range(math.floor(min_temp), math.ceil(max_temp), temperature_feature_interval):
      feat = np.append(feat, np.trapz(((temp > t) & (temp <= t+temperature_feature_interval)).astype(np.float32), time))
    # only check charging current (I>0), as the discharge cycles are constant
    for c in range(0, math.ceil(max_current), current_feature_interval):
      feat = np.append(feat, np.trapz(((current > c) & (current <= c+current_feature_interval)).astype(np.float32), time))
    feats.append(feat)
  
  curve_ratios = []
  random_min = curve_ratio_min
  random_max = random_min + curve_ratio_step_size
  for _ in range(curve_ratio_steps):
    new_ratio = round(random.uniform(random_min, min(random_max, curve_ratio_max)), curve_ratio_digits)
    curve_ratios.append(new_ratio)
    random_min = new_ratio
    random_max += curve_ratio_step_size
  curve_ratios.append(1.0)
  
  print('Preprocessed', key, 'with', len(feats), 'cycles')
  
  pre_processed_data[key] = {
    'capacities': torch.tensor(caps, dtype=torch.float32),
    'features': torch.tensor(feats, dtype=torch.float32),
    'curve_ratios': curve_ratios,
  }

Preprocessed b1c1 with 2154 cycles
Preprocessed b1c2 with 2233 cycles
Preprocessed b1c3 with 1430 cycles
Preprocessed b1c4 with 1704 cycles
Preprocessed b1c5 with 1072 cycles
Preprocessed b1c6 with 634 cycles
Preprocessed b1c7 with 868 cycles
Preprocessed b1c9 with 1052 cycles
Preprocessed b1c11 with 786 cycles
Preprocessed b1c14 with 878 cycles
Preprocessed b1c15 with 717 cycles
Preprocessed b1c16 with 860 cycles
Preprocessed b1c17 with 855 cycles
Preprocessed b1c19 with 786 cycles
Preprocessed b1c20 with 532 cycles
Preprocessed b1c21 with 557 cycles
Preprocessed b1c23 with 1012 cycles
Preprocessed b1c24 with 1013 cycles
Preprocessed b1c25 with 852 cycles
Preprocessed b1c26 with 868 cycles
Preprocessed b1c27 with 840 cycles
Preprocessed b1c28 with 858 cycles
Preprocessed b1c29 with 915 cycles
Preprocessed b1c30 with 707 cycles
Preprocessed b1c31 with 874 cycles
Preprocessed b1c32 with 729 cycles
Preprocessed b1c33 with 755 cycles
Preprocessed b1c34 with 740 cycles
Preprocessed b1c35 w

In [6]:
pickle.dump(pre_processed_data, open(os.path.join(data_dir_path,'preprocessed_data.pkl'), 'wb'))