In [1]:
import context
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import os
from numba import cuda 
import json
import traceback
import time
import random

from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
import concurrent.futures
from tqdm import tqdm


import src.data.data_preprocessing as util
import src.models.lstm_model as lstm_vanilla # data baseline, pretrain, hybrid
import src.models.lstm_loss_model as lstm_loss # loss
import src.models.lstm_architecture_model as lstm_arch # architecture
import src.models.lstm_residual_model as lstm_residual # residual

2024-05-08 16:49:56.817881: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-08 16:49:56.938040: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-08 16:49:57.338933: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:$LD_LIBRARY_PATH:/home/i40/bielskip/miniconda3/envs/battery-system/lib/
2024-05-08 16:49:57.338984: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic lib

In [2]:
def run_experiment(hyper_params):
    np.random.seed(hyper_params['seed'])
 
    
    print("run_exp")

    while True:
        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        experiment_path = f"../../../models/experiments/{hyper_params['experiment_name']}/{timestamp}"

        model_save_path = f'{experiment_path}/model'
        results_path = f'{experiment_path}/results'

        try:
            os.makedirs(model_save_path)
            os.makedirs(results_path)
            break
        except FileExistsError:
            time.sleep(random.randint(2, 30))
            continue

    hyper_params['model_save_path'] = model_save_path
    hyper_params['results_path'] = results_path
    hyper_params['n_features'] = len(hyper_params['input_features'])
    with open(f'{experiment_path}/hyperparameters.json', 'w') as json_file:
        json.dump(hyper_params, json_file, indent=4)

        
    
    # Prepare Training Data
    TRAINING_SETS =  np.load('../../../models/training_setup/training_sets.npy', allow_pickle=True)
    TRAINING_SETS = TRAINING_SETS.item()
    training_sets = TRAINING_SETS['10A_all']  
    X_list, y_list, scalers_train,  = util.prepare_all_features_input(hyper_params, training_sets, hyper_params['stack'], hyper_params['cell'])
    
    # Prepare Test Data
    TEST_SETS =  np.load('../../../models/training_setup/test_sets.npy', allow_pickle=True)
    TEST_SETS = TEST_SETS.item()
    test_profiles_usecase_1 = TEST_SETS['Reproduction']
    test_profiles_usecase_2 = TEST_SETS['Abstraction']
    test_profiles_usecase_3 = TEST_SETS['Generalization']
    X_case_1, y_case_1, _  = util.prepare_all_features_input(hyper_params, test_profiles_usecase_1, hyper_params['stack'], hyper_params['cell'])
    X_case_2, y_case_2, _  = util.prepare_all_features_input(hyper_params, test_profiles_usecase_2, hyper_params['stack'], hyper_params['cell'])
    X_case_3, y_case_3, _  = util.prepare_all_features_input(hyper_params, test_profiles_usecase_3, hyper_params['stack'], hyper_params['cell'])
    
    
    # select the train input features
    X_list_features = []
    feature_idx = [hyper_params['feature_indices'][feature] for feature in hyper_params['input_features']]
    for array in X_list:
        X_list_features.append(array[:, :, feature_idx])    

    # train TGDS methods
    if hyper_params['method'] == 'data_baseline':
        lstm = lstm_vanilla.Model()
        lstm.initialize(hyper_params)
        lstm.train_f(X_list_features, y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)
    
    elif hyper_params['method'] == 'pretrain':
        lstm = lstm_vanilla.Model()
        lstm.initialize(hyper_params)
        # train first half on the theory-output
        y_list_pretrain = []
        for array in X_list:
            y_list_pretrain.append(array[:, 99, 4]) # Theory model
        lstm.train_f(X_list_features, y_list_pretrain, scalers_train, half_train=True) 
        # train the second half with standard output
        lstm.train_f(X_list_features, y_list, scalers_train, half_train=True)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)
    
    elif hyper_params['method'] == 'hybrid':
        # add theory output to the input features
        feature_idx.append(4)
        X_list_features = []
        for array in X_list:
            X_list_features.append(array[:, :, feature_idx]) 
        hyper_params['n_features'] = len(hyper_params['input_features']) + 1
        
        lstm = lstm_vanilla.Model()
        lstm.initialize(hyper_params)
        lstm.train_f(X_list_features, y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)

    elif hyper_params['method'] == 'loss':
        lstm = lstm_loss.Model()
        lstm.initialize(hyper_params, scalers_train)
        lstm.train_f(X_list_features, y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)

    elif hyper_params['method'] == 'architecture':
        # prepare conditional input as initial voltage
        cond_train = []
        for array in X_list:
            cond_train.append(array[:, 98:99, 2])
        X_list_features_cond_train = [[X_list_features[idx], cond_train[idx]] for idx in range(len(cond_train))]        
        
        lstm = lstm_arch.Model()
        lstm.initialize(hyper_params)
        lstm.train_f(X_list_features_cond_train, y_list, scalers_train)
        lstm.test_usecases([X_case_1[0][:, :, feature_idx], X_case_1[0][:, 98:99, 2]], y_case_1[0], [X_case_2[0][:, :, feature_idx], X_case_2[0][:, 98:99, 2]], y_case_2[0], [X_case_3[0][:, :, feature_idx], X_case_3[0][:, 98:99, 2]], y_case_3[0], scalers_train)
    
    elif hyper_params['method'] == 'residual':
        # prepare residual output
        residual_y_list = []
        for array in X_list:
            residual_y_list.append(array[:, 98:99, 5]) # residual   
            
        lstm = lstm_residual.Model()
        lstm.initialize(hyper_params)
        lstm.train_f(X_list_features, residual_y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_1[0][:, 98:99, 4], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_2[0][:, 98:99, 4], X_case_3[0][:, :, feature_idx], y_case_3[0], X_case_3[0][:, 98:99, 4], scalers_train)
    else:
        print("Unknown method.")
    
        

In [3]:
def worker(config, gpu_queue):
    gpu_id = int(gpu_queue.get())

    try:
        tf.config.experimental.set_memory_growth(tf.config.list_physical_devices("GPU")[gpu_id], True)
        tf.config.set_visible_devices(tf.config.list_physical_devices("GPU")[gpu_id], "GPU")

        run_experiment(config)

    except Exception as e:
        print(str(e))
        traceback.print_exc()

    gpu_queue.put(gpu_id)

In [4]:
# from tensorflow.python.client import device_lib

# def get_available_gpus():
#     local_device_protos = device_lib.list_local_devices()
#     return [x.name for x in local_device_protos if x.device_type == 'GPU']

# get_available_gpus()

In [5]:
def ex_test_tgds(hyper_params):
    hyper_params['experiment_name'] = 'test_tgds_methods'
    configs = []
    for method in ["residual"]:
        for input_features in [['current'], ['current', 'charge'], ['current', 'charge', 'init_vol'], ['current', 'charge', 'init_vol', 'ocv']]:
            config = hyper_params.copy()
            config['input_features'] = input_features
            config['method'] = method
            configs.append(config)
    return configs

In [None]:
def ex_all_but_loss_params_config(hyper_params):
    hyper_params['experiment_name'] = 'all_but_loss_params_config'
    configs = []
    for run in range(5):
        for method in ["data_baseline", "architecture", "pretrain", "hybrid", "residual"]
            for num_layers in [1, 2]:
                for epochs in [10, 20, 50]:
                    for input_features in [['current'], ['current', 'charge'], ['current', 'charge', 'init_vol'], ['current', 'charge', 'init_vol', 'ocv']]:

                            config = hyper_params.copy()
                            config['seed'] = run
                            config['method'] = method
                            config['n_lstm_layers'] = num_layers  
                            config['n_epochs'] = epochs
                            config['input_features'] = input_features
                            configs.append(config)
    return configs

In [None]:
def ex_loss_parsams_config(hyper_params):
    hyper_params['experiment_name'] = 'loss_params_config'
    hyper_params['method'] = 'loss'
    configs = []
    for run in range(5):
        for lambda_soc in [0.001, 0.01, 0.1, 1, 10, 100, 1000]
            for num_layers in [1, 2]:
                for epochs in [10, 20, 50]:
                    for input_features in [['current'], ['current', 'charge'], ['current', 'charge', 'init_vol'], ['current', 'charge', 'init_vol', 'ocv']]:
                        config = hyper_params.copy()
                        config['seed'] = run
                        config['lambda_soc'] = lambda_soc
                        config['n_lstm_layers'] = num_layers  
                        config['n_epochs'] = epochs
                        config['input_features'] = input_features
                        configs.append(config)
    return configs

In [6]:
def ex_baseline_parsams_config(hyper_params):
    configs = []
    for run in range(3):
        for num_layers in [1, 2]:
            for epochs in [10, 20, 50]:
                for input_features in [['current'], ['current', 'charge'], ['current', 'charge', 'init_vol'], ['current', 'charge', 'init_vol', 'ocv']]:

                        config = hyper_params.copy()
                        config['seed'] = run
                        config['n_lstm_layers'] = num_layers  
                        config['n_epochs'] = epochs
                        config['input_features'] = input_features
                        configs.append(config)
    return configs

In [7]:

# Define available GPUs
gpus = ["0", "1"]
methods = ["data_baseline", "loss", "architecture", "pretrain", "hybrid", "residual"]


# Example list of configurations
hyper_params = np.load('../../../models/training_setup/hyperparameters.npy', allow_pickle=True).item()
#hyper_params['method'] = 'data_baseline'
hyper_params['n_epochs'] = 2
hyper_params['n_steps'] = 100
hyper_params['seed'] = 0
#hyper_params['experiment_name'] = 'explore_baseline_params'
#hyper_params['input_features'] = ['current', 'charge', 'init_vol', 'ocv']

# # Create configurations
# configs = []
# for run in range(2):
#     config = hyper_params.copy()
#     config['seed'] = run  
#     configs.append(config)

#configs = ex_baseline_parsams_config(hyper_params)

configs = ex_test_tgds(hyper_params)

with Manager() as manager:
    gpu_queue = manager.Queue()
    for gpu_id in gpus:
        gpu_queue.put(gpu_id)

    with ProcessPoolExecutor(max_workers=len(gpus)) as executor:
        futures = [executor.submit(worker, config, gpu_queue) for config in configs]

        for _ in tqdm(concurrent.futures.as_completed(futures), total=len(configs)):
            pass


  0%|                                                     | 0/4 [00:00<?, ?it/s]

run_exprun_exp



2024-05-08 16:49:58.717805: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-08 16:49:58.717993: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-08 16:49:58.718013: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-08 16:49:58.718269: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-08 16:49:58.741748: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

Model: "Residual_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 20)           1760      
                                                                 
 lstm_1 (LSTM)               (None, 20)                3280      
                                                                 
 dense (Dense)               (None, 1)                 21        
                                                                 
Total params: 5,061
Trainable params: 5,061
Non-trainable params: 0
_________________________________________________________________


2024-05-08 16:50:17.666981: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2024-05-08 16:50:18.105313: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
 25%|███████████▎                                 | 1/4 [00:35<01:46, 35.41s/it]

run_exp


2024-05-08 16:50:45.465088: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-08 16:50:45.466958: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-08 16:50:45.467177: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-08 16:50:45.467319: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Model: "Residual_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 20)           1840      
                                                                 
 lstm_1 (LSTM)               (None, 20)                3280      
                                                                 
 dense (Dense)               (None, 1)                 21        
                                                                 
Total params: 5,141
Trainable params: 5,141
Non-trainable params: 0
_________________________________________________________________


2024-05-08 16:50:46.673106: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2024-05-08 16:50:47.105917: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Model: "Residual_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 100, 20)           1920      
                                                                 
 lstm_3 (LSTM)               (None, 20)                3280      
                                                                 
 dense_1 (Dense)             (None, 1)                 21        
                                                                 
Total params: 5,221
Trainable params: 5,221
Non-trainable params: 0
_________________________________________________________________


 50%|██████████████████████▌                      | 2/4 [01:04<01:03, 31.75s/it]

run_exp


 75%|█████████████████████████████████▊           | 3/4 [01:09<00:19, 19.39s/it]

Model: "Residual_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 100, 20)           2000      
                                                                 
 lstm_3 (LSTM)               (None, 20)                3280      
                                                                 
 dense_1 (Dense)             (None, 1)                 21        
                                                                 
Total params: 5,301
Trainable params: 5,301
Non-trainable params: 0
_________________________________________________________________


100%|█████████████████████████████████████████████| 4/4 [01:38<00:00, 24.62s/it]


In [8]:
# run_experiment(config)