# 1 Data split

If you are interested in the LFP data split. Please use this code. 

In [1]:
import numpy as np
import random
import pandas as pd
import copy
import os
import json
seed = 2023
random.seed(seed)
np.random.seed(seed)

For Nature Energy dataset, we use the conditions that cause relatively short cycle life as the testing condtions. As for other conditions, they are randomly split into training conditions and validation conditions.

In [2]:
def check_policy_avg_life(policy_of_interest, name_policy, return_cell_number=False):
    cycle_life = []
    for name, policy in name_policy.items():
        if policy == policy_of_interest:
            df = pd.read_csv(f'./dataset/NatureEnergy_cycle_data/{name}.csv')
            cycle_life.append(len(df))
    if return_cell_number:
        return np.mean(cycle_life), len(cycle_life)
    return np.mean(cycle_life)

In [3]:
total_cell_names = os.listdir('./dataset/NatureEnergy_cycle_data/')
total_cell_names = [i.split('.')[0] for i in total_cell_names]
val_ratio = 0.2
name_policy = json.load(open('./dataset/name_policy.json'))
policies = []
for name, policy in name_policy.items():
    if policy not in policies:
        policies.append(policy)
train_policies = []
test_policies = []
val_policies = []

for policy in policies:
    if check_policy_avg_life(policy,name_policy) < 470:
        test_policies.append(policy)
    else:
        train_policies.append(policy)
print(f'Train-{len(train_policies)}:{train_policies}')
print(f'Test-{len(test_policies)}:{test_policies}')

val_num = 12
val_policies = random.sample(train_policies,val_num)
train_policies = [i for i in train_policies if i not in val_policies]
print('\n')
print(f'Train-{len(train_policies)}:{train_policies}')
print(f'Val-{len(val_policies)}:{val_policies}')
print(f'Test-{len(test_policies)}:{test_policies}')

row_index = 0
print('Trainig conditions')
for p in train_policies:
    row_index += 1
    avg_cycle_life, cell_number = check_policy_avg_life(p, name_policy, True)
    print(row_index, cell_number, avg_cycle_life, p)
print('Validation conditions')
for p in val_policies:
    row_index += 1
    avg_cycle_life, cell_number = check_policy_avg_life(p, name_policy, True)
    print(row_index, cell_number, avg_cycle_life, p)
print('testing conditions')
for p in test_policies:
    row_index += 1
    avg_cycle_life, cell_number = check_policy_avg_life(p, name_policy, True)
    print(row_index, cell_number, avg_cycle_life, p)
# Get the specific cells
train_cells, val_cells, test_cells = [], [], []
for name, policy in name_policy.items():
    name = name + '.csv'
    if policy in train_policies:
        train_cells.append(name)
    elif policy in val_policies:
        val_cells.append(name)
    else:
        test_cells.append(name)
print('The cell number\t cells')
print(len(train_cells), train_cells)
print(len(val_cells), val_cells)
print(len(test_cells),test_cells)

Train-52:['8C(35%)-3.6C', '8C(25%)-3.6C', '8C(15%)-3.6C', '7C(40%)-3C', '7C(40%)-3.6C', '7C(30%)-3.6C', '6C(60%)-3C', '6C(50%)-3C', '6C(50%)-3.6C', '6C(40%)-3C', '6C(40%)-3.6C', '6C(4%)-4.75C', '6C(30%)-3.6C', '5C(67%)-4C', '5.9C(60%)-3.1C', '5.9C(15%)-4.6C', '5.6C(5%)-4.75C', '5.6C(38%)-4.25C', '5.6C(36%)-4.3C', '5.6C(25%)-4.5C', '5.6C(19%)-4.6C', '5.4C(80%)-5.4C', '5.4C(70%)-3C', '5.4C(60%)-3C', '5.4C(60%)-3.6C', '5.4C(50%)-3C', '5.4C(40%)-3.6C', '5.3C(54%)-4C', '5.2C(66%)-3.5C', '5.2C(58%)-4C', '5.2C(50%)-4.25C', '5.2C(37%)-4.5C', '5.2C(10%)-4.75C', '4C(80%)-4C', '4C(40%)-6C', '4C(4%)-4.85C', '4C(13%)-5C', '4.9C(69%)-4.25C', '4.9C(61%)-4.5C', '4.8C(80%)-4.8C', '4.65C(69%)-6C', '4.65C(44%)-5C', '4.65C(19%)-4.85C', '4.4C(80%)-4.4C', '4.4C(8%)-4.85C', '4.4C(47%)-5.5C', '4.4C(24%)-5C', '3.7C(31%)-5.9C', '3.6C(9%)-5C', '3.6C(80%)-3.6C', '3.6C(30%)-6C', '3.6C(2%)-4.85C']
Test-16:['6C(52%)-3.5C', '6C(40%)-4C', '6C(31%)-4.25C', '6C(20%)-4.5C', '5.6C(65%)-3C', '5.6C(58%)-3.5C', '5.6C(47%)-4C

In [4]:
test_cellPolicy_cycleLife = {}
for cell in test_cells:
    df = pd.read_csv(f'./dataset/NatureEnergy_cycle_data/{cell}')
    cell_name = cell.split('.')[0]
    cellPolicy = name_policy[cell_name]
    test_cellPolicy_cycleLife[cellPolicy] = [len(df),cell]
test_cellPolicy_cycleLife = sorted(test_cellPolicy_cycleLife.items(), key=lambda x:x[1][0])
print(test_cellPolicy_cycleLife)
for key, value in test_cellPolicy_cycleLife:
    print(key, value)

[('2C(10%)-6C', [146, 'b2c1.csv']), ('1C(4%)-6C', [297, 'b2c0.csv']), ('2C(7%)-5.5C', [332, 'b2c3.csv']), ('5.6C(65%)-3C', [426, 'b2c41.csv']), ('6C(52%)-3.5C', [427, 'b2c46.csv']), ('2C(2%)-5C', [435, 'b2c2.csv']), ('3.6C(22%)-5.5C', [441, 'b2c4.csv']), ('6C(40%)-4C', [454, 'b2c44.csv']), ('4C(31%)-5C', [455, 'b2c12.csv']), ('5.6C(58%)-3.5C', [456, 'b2c39.csv']), ('4.4C(55%)-6C', [458, 'b2c19.csv']), ('6C(31%)-4.25C', [459, 'b2c43.csv']), ('5.2C(71%)-3C', [461, 'b2c35.csv']), ('5.6C(47%)-4C', [462, 'b2c38.csv']), ('6C(20%)-4.5C', [463, 'b2c42.csv']), ('4.9C(27%)-4.75C', [464, 'b2c27.csv'])]
2C(10%)-6C [146, 'b2c1.csv']
1C(4%)-6C [297, 'b2c0.csv']
2C(7%)-5.5C [332, 'b2c3.csv']
5.6C(65%)-3C [426, 'b2c41.csv']
6C(52%)-3.5C [427, 'b2c46.csv']
2C(2%)-5C [435, 'b2c2.csv']
3.6C(22%)-5.5C [441, 'b2c4.csv']
6C(40%)-4C [454, 'b2c44.csv']
4C(31%)-5C [455, 'b2c12.csv']
5.6C(58%)-3.5C [456, 'b2c39.csv']
4.4C(55%)-6C [458, 'b2c19.csv']
6C(31%)-4.25C [459, 'b2c43.csv']
5.2C(71%)-3C [461, 'b2c35.csv'