In [23]:
import pickle as pkl
import torch
import numpy as np
import os
import pandas as pd
import pickle as pkl
import warnings
# Suppress all warnings
warnings.filterwarnings('ignore')

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [24]:
with open("all_vcm_nodiff_403.pkl","rb") as f:
    battery_data = pkl.load(f)

def extract_tensor_value(cell):
    return cell[0][0].item()
battery_data['label'] = battery_data['label'].apply(extract_tensor_value)

dataset_id_map = {}
for dataset_id in battery_data["dataset_id"].unique():
    # print(dataset_id)
    if 'HUST' in dataset_id:
        dataset_id_map[dataset_id] = 'HUST'
    elif 'MATR' in dataset_id:
        dataset_id_map[dataset_id] = 'MATR'
    else:
        dataset_id_map[dataset_id] = 'MIX'
print(dataset_id_map)
battery_data["dataset_id_new"] = battery_data["dataset_id"].map(dataset_id_map)

{'MIX': 'MIX', 'HUST': 'HUST', 'MATR': 'MATR'}


In [25]:
len_MATR_train = len(battery_data[(battery_data["dataset_id_new"] == "MATR") & (battery_data["split"] == "train")])
len_MATR_val = len(battery_data[(battery_data["dataset_id_new"] == "MATR") & (battery_data["split"] == "valid")])
len_MATR_test1 = len(battery_data[(battery_data["dataset_id_new"] == "MATR") & (battery_data["split"] == "test1")])
len_MATR_test2 = len(battery_data[(battery_data["dataset_id_new"] == "MATR") & (battery_data["split"] == "test2")])

len_HUST_train = len(battery_data[(battery_data["dataset_id_new"] == "HUST") & (battery_data["split"] == "train")])
len_HUST_val = len(battery_data[(battery_data["dataset_id_new"] == "HUST") & (battery_data["split"] == "valid")])
len_HUST_test = len(battery_data[(battery_data["dataset_id_new"] == "HUST") & (battery_data["split"] == "test")])

len_MIX_train = len(battery_data[(battery_data["dataset_id_new"] == "MIX") & (battery_data["split"] == "train")])
len_MIX_val = len(battery_data[(battery_data["dataset_id_new"] == "MIX") & (battery_data["split"] == "valid")])
len_MIX_test = len(battery_data[(battery_data["dataset_id_new"] == "MIX") & (battery_data["split"] == "test")])

print("MATR: #train={}, #val={}, #test1={}, #test2={}".format(len_MATR_train, len_MATR_val, len_MATR_test1, len_MATR_test2))
print("HUST: #train={}, #val={}, #test={}".format(len_HUST_train, len_HUST_val, len_HUST_test))
print("MIX: #train={}, #val={}, #test={}".format(len_MIX_train, len_MIX_val, len_MIX_test))

MATR: #train=41, #val=45, #test1=43, #test2=40
HUST: #train=55, #val=0, #test=22
MIX: #train=116, #val=0, #test=30


In [26]:
battery_data.head(3)

Unnamed: 0,dataset_id,chemistry,cell_id,cycle_numbers,label,feature,split,dataset_id_new
0,MIX,LCO,CALCE_CS2_33,"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1...",474.0,"[[tensor(0., dtype=torch.float64), tensor(0., ...",train,MIX
1,MIX,LCO,CALCE_CS2_34,"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 1...",449.0,"[[tensor(0., dtype=torch.float64), tensor(0., ...",train,MIX
2,MIX,LCO,CALCE_CS2_35,"[7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ...",519.0,"[[tensor(0., dtype=torch.float64), tensor(0., ...",test,MIX


In [27]:
task_names = ['HUST' 'MATR' 'MIX']

def get_x_data(df, col = "feature", cycle_pairs=[[(98, 8)]]):
    x = []
    for i in range(len(df)):
        x_i = df[col].iloc[i]
        # Initialize an empty list to hold feature vectors for the current sample
        feature_vectors = []
        # Loop through each pair of cycles and calculate the difference
        for start, end in cycle_pairs:
            diff = x_i[start, :] - x_i[end, :]
            feature_vectors.append(torch.tensor(diff).reshape(-1))
        # Concatenate all feature vectors to form a single feature vector
        x_i_features = torch.cat(feature_vectors)
        x.append(x_i_features)
    x = torch.stack(x)
    return x

def create_y(df, col = "label"):
    y = df[col]
    # convert all y from tensor to float
    # y = y.apply(lambda x: x.item())

    y = torch.tensor(y.reset_index(drop=True)).float()
    # convert to N by 1
    y = y.view(-1,1)
    return y

In [28]:
def get_dataset_for_task(task_name, battery_data = battery_data,  phase = "train", if_scale = False, dataset_id = "dataset_id_new", cycle_pairs=[(98, 8)]):
    dataset = battery_data[battery_data[dataset_id] == task_name]
    if phase == "train":
        if task_name == "MATR":
            dataset = dataset[
                                (dataset["split"] == "train")
                                # | (dataset["split"] == "valid")
                                # | (dataset["split"] == "test1")
                            ]

        # return dataset
        x = get_x_data(dataset, cycle_pairs=cycle_pairs)
        # print(dataset)
        y = create_y(dataset)

        return x, y

    if phase == "fine_tune":

        if task_name == "MATR":
            dataset_train = dataset[
                                (dataset["split"] == "train")
                            ]
            dataset_test = dataset[
                                (dataset["split"] == "test1")
                            ]
        if task_name == "HUST":
            dataset_train = dataset[
                                (dataset["split"] == "train")
                            ]
            dataset_test = dataset[
                                (dataset["split"] == "test")
                            ]
        if task_name == "MIX":
            dataset_train = dataset[
                                (dataset["split"] == "train")
                            ]
            dataset_test = dataset[
                                (dataset["split"] == "test")
                            ]
        train_x = get_x_data(dataset_train, cycle_pairs=cycle_pairs)
        train_y = create_y(dataset_train)

        if dataset_test is not None:
            test_x = get_x_data(dataset_test, cycle_pairs=cycle_pairs)
            test_y = create_y(dataset_test)
        else:
            test_x = None
            test_y = None
        return train_x, train_y, test_x, test_y
    
def split_train_test(initial_cycles, dataset_id):
    # Initialize empty DataFrames for concatenation
    df_train_x = pd.DataFrame()
    df_train_y = pd.DataFrame()
    df_test_x = pd.DataFrame()
    df_test_y = pd.DataFrame()

    # Loop through cycle pairs with second element increasing by 10 starting from 8
    for i in initial_cycles:  # here only perform 98-8. If define [8, 18, 28, ...] then perform 98-8, 98-18, 98-28, ..., then concat
        cycle_pairs = [(98, i)]
        train_x, train_y, test_x, test_y = get_dataset_for_task(dataset_id, phase="fine_tune", cycle_pairs=cycle_pairs)
        column_names = [f'F_99_{i}_{j}' for j in range(1000)]

        # Convert numpy arrays to pandas DataFrames
        temp_df_train_x = pd.DataFrame(train_x.numpy(), columns=column_names)
        temp_df_train_y = pd.DataFrame(train_y.numpy())
        temp_df_test_x = pd.DataFrame(test_x.numpy(), columns=column_names)
        temp_df_test_y = pd.DataFrame(test_y.numpy())

        # Concatenate the DataFrames vertically
        df_train_x = pd.concat([df_train_x, temp_df_train_x], axis=1)
        df_train_y = temp_df_train_y
        df_test_x = pd.concat([df_test_x, temp_df_test_x], axis=1)
        df_test_y = temp_df_test_y

        # Now df_train_x, df_train_y, df_test_x, and df_test_y are the concatenated DataFrames
        print("dataset:", dataset_id)
        print("\t train X shape:", df_train_x.shape, "train Y shape:", df_train_y.shape)
        print("\t test X shape:", df_test_x.shape, "test Y shape:", df_test_y.shape)

    df_train_y.rename(columns={0: 'EoL'}, inplace=True)
    df_test_y.rename(columns={0: 'EoL'}, inplace=True)
    return df_train_x, df_train_y, df_test_x, df_test_y

In [29]:
initial_cycles = [8]
dataset_id = "MATR"
X_train_MATR, y_train_MATR, X_test_MATR, y_test_MATR = split_train_test(initial_cycles, dataset_id)

dataset_id = "MIX"
X_train_MIX, y_train_MIX, X_test_MIX, y_test_MIX = split_train_test(initial_cycles, dataset_id)

dataset_id = "HUST"
X_train_HUST, y_train_HUST, X_test_HUST, y_test_HUST = split_train_test(initial_cycles, dataset_id)

dataset: MATR
	 train X shape: (41, 1000) train Y shape: (41, 1)
	 test X shape: (43, 1000) test Y shape: (43, 1)
dataset: MIX
	 train X shape: (116, 1000) train Y shape: (116, 1)
	 test X shape: (30, 1000) test Y shape: (30, 1)
dataset: HUST
	 train X shape: (55, 1000) train Y shape: (55, 1)
	 test X shape: (22, 1000) test Y shape: (22, 1)


In [40]:
X_train_MATR.head()

Unnamed: 0,F_99_8_0,F_99_8_1,F_99_8_2,F_99_8_3,F_99_8_4,F_99_8_5,F_99_8_6,F_99_8_7,F_99_8_8,F_99_8_9,...,F_99_8_990,F_99_8_991,F_99_8_992,F_99_8_993,F_99_8_994,F_99_8_995,F_99_8_996,F_99_8_997,F_99_8_998,F_99_8_999
0,-1.264699e-06,-1.264699e-06,-1.264699e-06,-1.5e-05,-2.8e-05,-6.540445e-05,-0.000103,-0.00014,-0.000178,-0.000215,...,-0.001796,-0.001787,-0.001733,-0.001805,-0.001861,-0.001917,-0.001975,-0.002032,-0.000314,0.001404
1,3.883124e-07,3.883124e-07,3.883124e-07,7e-06,1.3e-05,1.23971e-05,1.1e-05,1e-05,1e-05,9e-06,...,0.001256,0.001256,0.000318,-0.000619,-0.00089,-0.001161,-0.003497,-0.005833,-0.006285,-0.006737
2,-0.0001083377,-0.0001083377,-0.0001083377,-0.000108,-0.000108,-0.0001083377,-0.000108,-0.000108,-0.000108,-0.000108,...,-0.004073,-0.002529,-0.002064,-0.001599,2.3e-05,0.001644,0.001708,0.001771,0.001839,0.002419
3,-1.805152e-05,-4.090029e-05,-6.374906e-05,-6.4e-05,-6.4e-05,-6.454846e-05,-6.5e-05,-6.5e-05,-6.5e-05,-6.6e-05,...,-0.003629,-0.003632,-0.004635,-0.005639,-0.006663,-0.007688,-0.008262,-0.008837,-0.009877,-0.010917
4,-1.431201e-05,-1.431201e-05,-1.431201e-05,-1.4e-05,-1.4e-05,1.669296e-07,1.5e-05,5.6e-05,9.7e-05,0.000138,...,-0.006731,-0.006635,-0.006613,-0.006591,-0.006591,-0.006591,-0.006591,-0.006591,-0.006591,-0.006591


shot train and val for MATR

In [30]:
# 5 shot
five_shot_train_index_MATR = [9, 3, 6, 18, 36] 
five_shot_train_X_MATR = X_train_MATR.iloc[five_shot_train_index_MATR]
five_shot_train_Y_MATR = y_train_MATR.iloc[five_shot_train_index_MATR]

five_shot_val_index_MATR = [0, 12, 15, 21, 24] 
five_shot_val_X_MATR = X_train_MATR.iloc[five_shot_val_index_MATR]
five_shot_val_Y_MATR = y_train_MATR.iloc[five_shot_val_index_MATR]

# 10 shot
ten_shot_train_index_MATR =  [2, 3, 6, 14, 36, 9, 12, 15, 21, 24] 
ten_shot_train_X_MATR = X_train_MATR.iloc[ten_shot_train_index_MATR]
ten_shot_train_Y_MATR = y_train_MATR.iloc[ten_shot_train_index_MATR]

ten_shot_val_index_MATR =  [0, 1, 4, 5, 18, 19, 23, 29, 35, 40] 
ten_shot_val_X_MATR = X_train_MATR.iloc[ten_shot_val_index_MATR]
ten_shot_val_Y_MATR = y_train_MATR.iloc[ten_shot_val_index_MATR]

shot train and val for MIX

In [31]:
# 5 shot
five_shot_train_index_MIX = [0, 10, 60, 63, 65] 
five_shot_train_X_MIX = X_train_MIX.iloc[five_shot_train_index_MIX]
five_shot_train_Y_MIX = y_train_MIX.iloc[five_shot_train_index_MIX]

five_shot_val_index_MIX = [70, 80, 81, 90, 95] 
five_shot_val_X_MIX = X_train_MIX.iloc[five_shot_val_index_MIX]
five_shot_val_Y_MIX = y_train_MIX.iloc[five_shot_val_index_MIX]

# 10 shot
ten_shot_train_index_MIX =  [0,  10, 60, 63, 65, 70, 80, 81, 90, 95] 
ten_shot_train_X_MIX = X_train_MIX.iloc[ten_shot_train_index_MIX]
ten_shot_train_Y_MIX = y_train_MIX.iloc[ten_shot_train_index_MIX]

ten_shot_val_index_MIX =  [5, 15, 29, 59, 66, 72, 82, 83, 91, 96] 
ten_shot_val_X_MIX = X_train_MIX.iloc[ten_shot_val_index_MIX]
ten_shot_val_Y_MIX = y_train_MIX.iloc[ten_shot_val_index_MIX]

shot train and val for HUST

In [32]:
# 5 shot
five_shot_train_index_HUST = [0, 10, 25, 30, 35] 
five_shot_train__X_HUST = X_train_HUST.iloc[five_shot_train_index_HUST]
five_shot_train_Y_HUST = y_train_HUST.iloc[five_shot_train_index_HUST]

five_shot_val_index_HUST = [15, 20, 40, 12, 54] 
five_shot_val__X_HUST = X_train_HUST.iloc[five_shot_val_index_HUST]
five_shot_val_Y_HUST = y_train_HUST.iloc[five_shot_val_index_HUST]

# 10 shot
ten_shot_train_index_HUST =  [0, 8, 25, 30, 35, 15, 20, 40, 12, 54] 
ten_shot_train_X_HUST = X_train_HUST.iloc[ten_shot_train_index_HUST]
ten_shot_train_Y_HUST = y_train_HUST.iloc[ten_shot_train_index_HUST]

ten_shot_val_index_HUST =  [5, 10, 24, 31, 34, 16, 21, 41, 13, 53]
ten_shot_val_X_HUST = X_train_HUST.iloc[ten_shot_val_index_HUST]
ten_shot_val_Y_HUST = y_train_HUST.iloc[ten_shot_val_index_HUST]

In [33]:
def get_dataset_for_task(task_name, battery_data = battery_data,  phase = "train", if_scale = False, dataset_id = "dataset_id_new", cycle_pairs=[(98, 8)]):
    dataset = battery_data[battery_data[dataset_id] == task_name]
    if phase == "train":
        if task_name == "MATR":
            dataset = dataset[
                                (dataset["split"] == "train")
                                # | (dataset["split"] == "valid")
                                # | (dataset["split"] == "test1")
                            ]

        # return dataset
        x = get_x_data(dataset, cycle_pairs=cycle_pairs)
        # print(dataset)
        y = create_y(dataset)

        return x, y

    if phase == "fine_tune":

        if task_name == "MATR":
            dataset_train = dataset[
                                (dataset["split"] == "train") | (dataset["split"] == "test1") | (dataset["split"] == "test2")
                            ]
            dataset_test = dataset[
                                (dataset["split"] == "test1")
                            ]
        if task_name == "HUST":
            dataset_train = dataset[
                                (dataset["split"] == "train") | (dataset["split"] == "test")
                            ]
            dataset_test = dataset[
                                (dataset["split"] == "test")
                            ]
        if task_name == "MIX":
            dataset_train = dataset[
                                (dataset["split"] == "train") | (dataset["split"] == "test")
                            ]
            dataset_test = dataset[
                                (dataset["split"] == "test")
                            ]
        train_x = get_x_data(dataset_train, cycle_pairs=cycle_pairs)
        train_y = create_y(dataset_train)

        if dataset_test is not None:
            test_x = get_x_data(dataset_test, cycle_pairs=cycle_pairs)
            test_y = create_y(dataset_test)
        else:
            test_x = None
            test_y = None
        return train_x, train_y, test_x, test_y

HUST + MIX -> MATR

In [34]:
# 10 shot
X_train = np.vstack((X_train_HUST, X_train_MIX, ten_shot_train_X_MATR))
y_train = np.vstack((y_train_HUST, y_train_MIX, ten_shot_train_Y_MATR))

shot_X, shot_Y = ten_shot_val_X_MATR, ten_shot_val_Y_MATR
test_X, test_Y = X_test_MATR, y_test_MATR

In [35]:
# 5 shot
X_train = np.vstack((X_train_HUST, X_train_MIX, five_shot_train_X_MATR))
y_train = np.vstack((y_train_HUST, y_train_MIX, five_shot_train_Y_MATR))

shot_X, shot_Y = five_shot_val_X_MATR, five_shot_val_Y_MATR
test_X, test_Y = X_test_MATR, y_test_MATR

MATR + MIX -> HUST

In [36]:
# 10 shot
X_train = np.vstack((X_train_MATR, X_train_MIX, ten_shot_train_X_HUST))
y_train = np.vstack((y_train_MATR, y_train_MIX, ten_shot_train_Y_HUST))

shot_X, shot_Y = ten_shot_val_X_HUST, ten_shot_val_Y_HUST
test_X, test_Y = X_test_HUST, y_test_HUST

In [37]:
# 5 shot
X_train = np.vstack((X_train_MATR, X_train_MIX, five_shot_train__X_HUST))
y_train = np.vstack((y_train_MATR, y_train_MIX, five_shot_train_Y_HUST))

shot_X, shot_Y = five_shot_val__X_HUST, five_shot_val_Y_HUST
test_X, test_Y = X_test_HUST, y_test_HUST

MATR + HUST -> MIX

In [38]:
# 10 shot
X_train = np.vstack((X_train_HUST, X_train_MATR, ten_shot_train_X_MIX))
y_train = np.vstack((y_train_HUST, y_train_MATR, ten_shot_train_Y_MIX))

shot_X, shot_Y = ten_shot_val_X_MIX, ten_shot_val_Y_MIX
test_X, test_Y = X_test_MIX, y_test_MIX

In [39]:
# 5 shot
X_train = np.vstack((X_train_HUST, X_train_MATR, five_shot_train_X_MIX))
y_train = np.vstack((y_train_HUST, y_train_MATR, five_shot_train_Y_MIX))

shot_X, shot_Y = five_shot_val_X_MIX, five_shot_val_Y_MIX
test_X, test_Y = X_test_MIX, y_test_MIX