In [1]:
import context
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import os
from numba import cuda 
import json
import traceback
import time
import random

from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
import concurrent.futures
from tqdm import tqdm


import src.data.data_preprocessing as util
import src.models.lstm_model as lstm_vanilla # data baseline, pretrain, hybrid
import src.models.loss_lstm_model as lstm_loss # loss
import src.models.architecture_lstm_model as lstm_arch # architecture
import src.models.lstm_residual_model as lstm_residual # residual

2024-05-07 19:33:35.326907: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-07 19:33:35.446081: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-07 19:33:35.857364: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:$LD_LIBRARY_PATH:/home/i40/bielskip/miniconda3/envs/battery-system/lib/
2024-05-07 19:33:35.857417: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic lib

In [1]:
def run_experiment(hyper_params):
    np.random.seed(hyper_params['seed'])
 
    
    print("run_exp")

    while True:
        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        experiment_path = f"../../../models/experiments/{hyper_params['experiment_name']}/{timestamp}"

        model_save_path = f'{experiment_path}/model'
        results_path = f'{experiment_path}/results'

        try:
            os.makedirs(model_save_path)
            os.makedirs(results_path)
            break
        except FileExistsError:
            time.sleep(random.randint(2, 30))
            continue

    hyper_params['model_save_path'] = model_save_path
    hyper_params['results_path'] = results_path
    hyper_params['n_features'] = len(hyper_params['input_features'])
    with open(f'{experiment_path}/hyperparameters.json', 'w') as json_file:
        json.dump(hyper_params, json_file, indent=4)

        
    
    # Prepare Training Data
    TRAINING_SETS =  np.load('../../../models/training_setup/training_sets.npy', allow_pickle=True)
    TRAINING_SETS = TRAINING_SETS.item()
    training_sets = TRAINING_SETS['10A_all']  
    X_list, y_list, scalers_train,  = util.prepare_all_features_input(hyper_params, training_sets, hyper_params['stack'], hyper_params['cell'])
    
    # Prepare Test Data
    TEST_SETS =  np.load('../../../models/training_setup/test_sets.npy', allow_pickle=True)
    TEST_SETS = TEST_SETS.item()
    test_profiles_usecase_1 = TEST_SETS['Reproduction']
    test_profiles_usecase_2 = TEST_SETS['Abstraction']
    test_profiles_usecase_3 = TEST_SETS['Generalization']
    X_case_1, y_case_1, _  = util.prepare_all_features_input(hyper_params, test_profiles_usecase_1, hyper_params['stack'], hyper_params['cell'])
    X_case_2, y_case_2, _  = util.prepare_all_features_input(hyper_params, test_profiles_usecase_2, hyper_params['stack'], hyper_params['cell'])
    X_case_3, y_case_3, _  = util.prepare_all_features_input(hyper_params, test_profiles_usecase_3, hyper_params['stack'], hyper_params['cell'])
    
    
    # select the train input features
    X_list_features = []
    feature_idx = [hyper_params['feature_indices'][feature] for feature in hyper_params['input_features']]
    for array in X_list:
        X_list_features.append(array[:, :, feature_idx])    

    # train TGDS methods
    if hyper_params['method'] == 'data_baseline':
        lstm = lstm_vanilla.Model()
        lstm.initialize(hyper_params)
        lstm.train_f(X_list_features, y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)
    
    elif hyper_params['method'] == 'pretrain':
        lstm = lstm_vanilla.Model()
        lstm.initialize(hyper_params)
        # train first half on the theory-output
        y_list_pretrain = []
        for array in X_list:
            y_list_pretrain.append(array[:, 99, 4]) # Theory model
        lstm.train_f(X_list_features, y_list_pretrain, scalers_train, half_train=True) 
        # train the second half with standard output
        lstm.train_f(X_list_features, y_list, scalers_train, half_train=True)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)
    
    elif hyper_params['method'] == 'hybrid':
        # add theory output to the input features
        feature_idx.append(4)
        X_list_features = []
        for array in X_list:
            X_list_features.append(array[:, :, feature_idx]) 
        hyper_params['n_features'] = len(hyper_params['input_features']) + 1
        
        lstm = lstm_vanilla.Model()
        lstm.initialize(hyper_params)
        lstm.train_f(X_list_features, y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)

    elif hyper_params['method'] == 'loss':
        lstm = lstm_loss.Model()
        lstm.initialize(hyper_params, scalers_train)
        lstm.train_f(X_list_features, y_list, scalers_train)
        lstm.test_usecases(X_case_1[0][:, :, feature_idx], y_case_1[0], X_case_2[0][:, :, feature_idx], y_case_2[0], X_case_3[0][:, :, feature_idx], y_case_3[0], scalers_train)

    elif hyper_params['method'] == 'architecture':
        

In [3]:
def worker(config, gpu_queue):
    gpu_id = int(gpu_queue.get())

    try:
        tf.config.experimental.set_memory_growth(tf.config.list_physical_devices("GPU")[gpu_id], True)
        tf.config.set_visible_devices(tf.config.list_physical_devices("GPU")[gpu_id], "GPU")

        run_experiment(config)

    except Exception as e:
        print(str(e))
        traceback.print_exc()

    gpu_queue.put(gpu_id)

In [4]:
# from tensorflow.python.client import device_lib

# def get_available_gpus():
#     local_device_protos = device_lib.list_local_devices()
#     return [x.name for x in local_device_protos if x.device_type == 'GPU']

# get_available_gpus()

In [5]:
def ex_baseline_parsams_config(hyper_params):
    configs = []
    for run in range(3):
        for num_layers in [1, 2]:
            for epochs in [10, 20, 50]:
                for input_features in [['current'], ['current', 'charge'], ['current', 'charge', 'init_vol'], ['current', 'charge', 'init_vol', 'ocv']]:

                        config = hyper_params.copy()
                        config['seed'] = run
                        config['n_lstm_layers'] = num_layers  
                        config['n_epochs'] = epochs
                        config['input_features'] = input_features
                        configs.append(config)
    return configs

In [None]:

# Define available GPUs
gpus = ["0", "1"]
methods = ["data_baseline", "loss", "architecture", "pretrain", "hybrid", "residual"]


# Example list of configurations
hyper_params = np.load('../../../models/training_setup/hyperparameters.npy', allow_pickle=True).item()
hyper_params['method'] = 'data_baseline'
hyper_params['n_epochs'] = 2
hyper_params['n_steps'] = 100
hyper_params['experiment_name'] = 'explore_baseline_params'
#hyper_params['input_features'] = ['current', 'charge', 'init_vol', 'ocv']

# # Create configurations
# configs = []
# for run in range(2):
#     config = hyper_params.copy()
#     config['seed'] = run  
#     configs.append(config)

configs = ex_baseline_parsams_config(hyper_params)

with Manager() as manager:
    gpu_queue = manager.Queue()
    for gpu_id in gpus:
        gpu_queue.put(gpu_id)

    with ProcessPoolExecutor(max_workers=len(gpus)) as executor:
        futures = [executor.submit(worker, config, gpu_queue) for config in configs]

        for _ in tqdm(concurrent.futures.as_completed(futures), total=len(configs)):
            pass


  0%|                                                    | 0/72 [00:00<?, ?it/s]

run_exp
run_exp


2024-05-07 19:33:37.212877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-07 19:33:37.213134: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-07 19:33:37.215040: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-07 19:33:37.215233: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-07 19:33:37.238416: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from S

Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 20)                1840      
                                                                 
 dense (Dense)               (None, 1)                 21        
                                                                 
Total params: 1,861
Trainable params: 1,861
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
2024-05-07 19:33:56.033522: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2024-05-07 19:33:56.465948: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-05-07 19:34:02.947803: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-07 19:34:02.949745: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2024-05-07 19:34:02.950004: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS ha

Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 20)                1760      
                                                                 
 dense (Dense)               (None, 1)                 21        
                                                                 
Total params: 1,781
Trainable params: 1,781
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
2024-05-07 19:34:04.012066: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8100
2024-05-07 19:34:04.433564: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
  1%|▌                                         | 1/72 [01:07<1:19:54, 67.53s/it]

run_exp


  3%|█▏                                          | 2/72 [01:14<37:21, 32.02s/it]

run_exp
Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 20)                1920      
                                                                 
 dense_1 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,941
Trainable params: 1,941
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 20)                2000      
                                                                 
 dense_1 (Dense)             (None, 1)                 21        
                                                                 
Total params: 2,021
Trainable params: 2,021
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
  4%|█▊                                          | 3/72 [02:13<50:56, 44.30s/it]

run_exp


  6%|██▍                                         | 4/72 [02:20<33:38, 29.69s/it]

run_exp
Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 20)                1760      
                                                                 
 dense_2 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,781
Trainable params: 1,781
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 20)                1840      
                                                                 
 dense_2 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,861
Trainable params: 1,861
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
  7%|██▉                                       | 5/72 [04:05<1:03:23, 56.77s/it]

run_exp


  8%|███▋                                        | 6/72 [04:13<44:14, 40.22s/it]

run_exp
Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 20)                1920      
                                                                 
 dense_3 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,941
Trainable params: 1,941
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 20)                2000      
                                                                 
 dense_3 (Dense)             (None, 1)                 21        
                                                                 
Total params: 2,021
Trainable params: 2,021
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
 10%|████                                      | 7/72 [05:58<1:06:23, 61.28s/it]

run_exp


 11%|████▉                                       | 8/72 [06:06<47:27, 44.50s/it]

run_exp
Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 20)                1760      
                                                                 
 dense_4 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,781
Trainable params: 1,781
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 20)                1840      
                                                                 
 dense_4 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,861
Trainable params: 1,861
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
 12%|█████▏                                   | 9/72 [10:10<1:51:54, 106.58s/it]

run_exp


 14%|█████▋                                   | 10/72 [10:20<1:19:36, 77.03s/it]

run_exp
Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_5 (LSTM)               (None, 20)                1920      
                                                                 
 dense_5 (Dense)             (None, 1)                 21        
                                                                 
Total params: 1,941
Trainable params: 1,941
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)


Model: "LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_5 (LSTM)               (None, 20)                2000      
                                                                 
 dense_5 (Dense)             (None, 1)                 21        
                                                                 
Total params: 2,021
Trainable params: 2,021
Non-trainable params: 0
_________________________________________________________________


  super().__init__(name, **kwargs)
 15%|██████                                  | 11/72 [14:24<2:10:06, 127.98s/it]

run_exp


 17%|██████▊                                  | 12/72 [14:34<1:32:08, 92.14s/it]

run_exp


In [None]:
# run_experiment(config)