In [25]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Masking, LSTM, Dense, concatenate, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np
import pandas as pd
import os
import csv
import random
import json
import pickle
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from itertools import product
from tensorflow.keras.layers import LSTM, Dense, Masking, Input, concatenate, Dropout
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K
from sklearn.base import BaseEstimator, TransformerMixin

#### Load data

In [26]:
SPECIAL_VALUE = -1.0

In [27]:
output = pd.read_csv('../../data/data_output.csv')

with open('../../data/X_general.pkl', 'rb') as f:
    X_general = pickle.load(f)

#### Split data into train and test set

#### Create an ensemble for the estimation of one parameter

In [28]:
def aleatoric_loss(y_true, y_pred):
    """Custom loss function for estimating aleatoric loss"""
    var_est = y_pred[:, 0]
    loss = 0.5 * (tf.math.log(var_est) + y_true / var_est)
    return tf.reduce_mean(loss)

def create_model(max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, dimension_general,
                 special_value, lstm_units=128, dense_units=128, num_lstm_layers=1, num_dense_hidden_layers=2, 
                 transfer_learning_rate=0.001, activation='relu', transfer_learning=False, aleatoric_uncertainty=False):
    input_layer_vector = Input(name="input_vector", shape=(max_seq_len_vector, dimension_vector))
    input_layer_scalar = Input(name="input_scalar", shape=(max_seq_len_scalar, dimension_scalar))
    input_general = Input(name="input_general", shape=(dimension_general,))
 
    masked_vector = Masking(name="masking_input_vector", mask_value=special_value)(input_layer_vector)
    masked_scalar = Masking(name="masking_input_scalar", mask_value=special_value)(input_layer_scalar)

    # Create LSTM layers with the specified number of units and layers
    lstm_output_vector = masked_vector
    lstm_output_scalar = masked_scalar

    for i in range(num_lstm_layers):
        lstm_output_vector = LSTM(lstm_units, name=f"lstm_vector_{i}", return_sequences=True if i < num_lstm_layers - 1 else False)(lstm_output_vector)
        lstm_output_scalar = LSTM(lstm_units, name=f"lstm_scalar_{i}", return_sequences=True if i < num_lstm_layers - 1 else False)(lstm_output_scalar)

    concat = concatenate([lstm_output_vector, lstm_output_scalar, input_general])

    # Create dense layers with the specified number of units and layers
    dense_output = concat
    for i in range(num_dense_hidden_layers):
        dense_output = Dense(dense_units, activation=activation, name=f"dense_{i}")(dense_output)

    output = Dense(1, name='output', activation='exponential' if aleatoric_uncertainty else 'linear')(dense_output)

    model = Model(inputs=[input_layer_vector, input_layer_scalar, input_general], outputs=output)

    optimizer = Adam(transfer_learning_rate if transfer_learning else 0.001)
    
    model.compile(optimizer=optimizer, loss=aleatoric_loss if aleatoric_uncertainty else 'mse')

    return model

#### Create an ensemble for transfer learning

In [29]:
def create_model_transfer_learning(base_model, max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, dimension_general,
                 special_value, lstm_units=128, dense_units=128, num_lstm_layers=1, num_dense_hidden_layers=2, 
                 transfer_learning_rate=0.001, activation='relu'):
    new_model = create_model(max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, dimension_general,
                             special_value, lstm_units, dense_units, num_lstm_layers, num_dense_hidden_layers, 
                             transfer_learning_rate, activation, transfer_learning=True)
    
    # Load pretrained weights
    for i in range(num_lstm_layers):
        new_model.get_layer(name=f"lstm_vector_{i}").set_weights(base_model.get_layer(name=f"lstm_vector_{i}").get_weights())
        #new_model.get_layer(name=f"lstm_vector_{i}").trainable = False
        new_model.get_layer(name=f"lstm_scalar_{i}").set_weights(base_model.get_layer(name=f"lstm_scalar_{i}").get_weights())
        #new_model.get_layer(name=f"lstm_scalar_{i}").trainable = False

    for i in range(num_dense_hidden_layers):
        new_model.get_layer(name=f"dense_{i}").set_weights(base_model.get_layer(name=f"dense_{i}").get_weights())
        # new_model.get_layer(name=f"dense_{i}").trainable = False
        
    
    return new_model

#### Define hyperparameter settings

#### Train a single model to predict SOH and concatenate it to input

#### Train Ensemble for all parameters with transfer learning

In [30]:
def bootstrap_sample(X_vector, X_scalar, X_general, y):
    n_samples = X_vector.shape[0]
    indices = np.random.choice(n_samples, n_samples, replace=True)
    return X_vector[indices], X_scalar[indices], X_general[indices], y[indices]

In [31]:
def fit_ensemble(n_members, X_vector_train, X_scalar_train, X_general_train, y_train, 
                 max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, special_value,
                 model_hyperparameter, train_hyperparameter, parameter, pretrained_model=None):
    ensemble = list()
    for _ in range(n_members):
        # Generate bootstrap sample
        X_vector_boot, X_scalar_boot, X_general_boot, y_boot = bootstrap_sample(X_vector_train, X_scalar_train, X_general_train, y_train)
        
        if pretrained_model is not None:
            model = create_model_transfer_learning(pretrained_model, max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, X_general_train.shape[1], special_value, **model_hyperparameter)
        else:
            model = create_model(max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, X_general_train.shape[1], special_value, **model_hyperparameter)
        
        num_epochs = 'transfer_epochs' if pretrained_model is not None else 'normal_epochs'
        model.fit([X_vector_boot, X_scalar_boot, X_general_boot], y_boot, epochs=train_hyperparameter[num_epochs], batch_size=train_hyperparameter['batch_size'], verbose=0)
        
        ensemble.append(model)
    
    return ensemble

#### Train Models to estimate aleatoric uncertainty

In [32]:
def ensemble_mean_std(ensemble, X_vector, X_scalar, X_general):
    predictions = np.array([model.predict([X_vector, X_scalar, X_general]) for model in ensemble])
    means = np.mean(predictions, axis=0)
    stds = np.std(predictions, axis=0)
    return means, stds

def estimate_aleatoric_uncertainty(ensemble,
                                   max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, dimension_general, special_value, 
                                   X_vector_test, X_scalar_test, X_general_test, y_test,
                                   model_hyperparameter, train_hyperparameter):
    means, stds = ensemble_mean_std(ensemble, X_vector_train, X_scalar_train, X_general_train)
    var = np.power(stds, 2)
    residuals_squared = np.power(y_train - means, 2)
    r_i_squared = np.maximum(residuals_squared - var, 0)
    model = create_model(max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, dimension_general, special_value, aleatoric_uncertainty=True, **model_hyperparameter)    
    model.fit([X_vector_train, X_scalar_train, X_general_train], r_i_squared, epochs=train_hyperparameter['normal_epochs'], batch_size=train_hyperparameter['batch_size'], verbose=0)
    aleatoric_var = model.predict([X_vector_test, X_scalar_test, X_general_test])
    return np.sqrt(aleatoric_var)

#### Evaluate Bootstrap Ensemble for all parameters

In [33]:
def adjust_bound(lower_bound, upper_bound, min_val, max_val, z_score, std):
    for i in range(len(lower_bound)):
        if min_val < lower_bound[i] and max_val > upper_bound[i]:
            continue  # Case 1
        elif lower_bound[i] < min_val and max_val > upper_bound[i]:
            lower_bound[i] = min_val  # Case 2
        elif min_val < lower_bound[i] and upper_bound[i] > max_val:
            upper_bound[i] = max_val  # Case 3
        elif lower_bound[i] < min_val and upper_bound[i] > max_val:
            lower_bound[i] = min_val
            upper_bound[i] = max_val  # Case 4
        elif lower_bound[i] > max_val:
            upper_bound[i] = max_val
            lower_bound[i] = max(min_val, max_val - z_score * std[i])  # Case 5
        elif upper_bound[i] < min_val:
            lower_bound[i] = min_val
            upper_bound[i] = min(max_val, min_val + z_score * std[i])  # Case 6
            
    return lower_bound, upper_bound

In [34]:
def evaluate_ensemble(ensemble, X_vector_test, X_scalar_test, X_general_test, y_test, stds_al, mu=0.95, eta=50):
    predictions = np.array([model.predict([X_vector_test, X_scalar_test, X_general_test]) for model in ensemble])
    means = np.mean(predictions, axis=0)
    stds_ep = np.std(predictions, axis=0)
    
    lower_bounds_ep = means - mu * stds_ep
    upper_bounds_ep = means + mu * stds_ep
    
    lower_bounds_al = means - mu * stds_al
    upper_bounds_al = means + mu * stds_al
    
    lower_bounds_total = means - mu * (stds_ep + stds_al)
    upper_bounds_total = means + mu * (stds_ep + stds_al)
    
    results = {}
    
    covered_ep = np.sum((y_test[:, 0] >= lower_bounds_ep[:, 0]) & (y_test[:, 0] <= upper_bounds_ep[:, 0]))
    covered_al = np.sum((y_test[:, 0] >= lower_bounds_al[:, 0]) & (y_test[:, 0] <= upper_bounds_al[:, 0]))
    covered_total = np.sum((y_test[:, 0] >= lower_bounds_total[:, 0]) & (y_test[:, 0] <= upper_bounds_total[:, 0]))
    
    picp_ep = covered_ep / len(y_test)
    picp_al = covered_al / len(y_test)
    picp_total = covered_total / len(y_test)
    
    mpiw_ep = np.mean(upper_bounds_ep[:, 0] - lower_bounds_ep[:, 0])
    mpiw_al = np.mean(upper_bounds_al[:, 0] - lower_bounds_al[:, 0])
    mpiw_total = np.mean(upper_bounds_total[:, 0] - lower_bounds_total[:, 0])
    
    target_range = np.max(y_test[:, 0]) - np.min(y_test[:, 0])
    
    
    nmpiw_ep = mpiw_ep / target_range
    nmpiw_al = mpiw_al / target_range
    nmpiw_total = mpiw_total / target_range
    
    
    mae = mean_absolute_error(y_test[:, 0], means[:, 0])
    maep = mean_absolute_percentage_error(y_test[:, 0], means[:, 0])

    results = {
        'picp': {
            'epistemic': picp_ep,
            'aleatoric': picp_al,
            'total': picp_total
        },
        'mpiw': {
            'epistemic': mpiw_ep,
            'aleatoric': mpiw_al,
            'total': mpiw_total
        },
        'nmpiw': {
            'epistemic': nmpiw_ep,
            'aleatoric': nmpiw_al,
            'total': nmpiw_total
        },
        'mae': mae,
        'maep': maep
    }
    
    return results

#### Hyperparameter tunining

In [35]:
normal_epochs = [10, 20, 30]
transfer_epochs = [5, 10, 25]
num_lstm_layers = [1, 2]
num_dense_layers = [2, 3, 4]
transfer_learning_rate = [0.0005, 0.001, 0.00025]
n_members = [5, 10, 15]
transfer_beta = [True, False]
resampling_frequency = ['monthly', 'weekly']

In [36]:
for _ in range(1000):
    
    print("==================================")
    print("New Iteration Start")
    print("==================================")
    
    with open(f'../../data/{random.choice(resampling_frequency)}_resampling/X_scalar.pkl', 'rb') as f:
        X_scalar = pickle.load(f)
    
    with open(f'../../data/{random.choice(resampling_frequency)}_resampling/X_vector.pkl', 'rb') as f:
        X_vector = pickle.load(f)
        
    max_seq_len_vector = X_vector.shape[1]
    dimension_vector = X_vector.shape[2]

    max_seq_len_scalar = X_scalar.shape[1]
    dimension_scalar = X_scalar.shape[2]
    
    train_size = int(0.8 * len(X_general))

    X_vector_train, X_vector_test = X_vector[:train_size], X_vector[train_size:]
    X_scalar_train, X_scalar_test = X_scalar[:train_size], X_scalar[train_size:]
    X_general_train, X_general_test = X_general[:train_size], X_general[train_size:]
    
    
    model_hyperparameter = {
        "lstm_units": 128, 
        "dense_units": 128, 
        "num_lstm_layers": random.choice(num_lstm_layers), 
        "num_dense_hidden_layers": random.choice(num_dense_layers),
        "transfer_learning_rate": random.choice(transfer_learning_rate),
        "activation": 'relu',
    }

    train_hyperparameter = {
        "normal_epochs": random.choice(normal_epochs),
        "transfer_epochs": random.choice(transfer_epochs),
        "batch_size": 32
    }

    n_members = random.choice(n_members)
    transfer_beta = random.choice(transfer_beta)
    
    # Train model to predict SOH and append as feature
    y = output[['OKT']].values
    y_train, y_test = y[:train_size], y[train_size:]

    soh_model = create_model(max_seq_len_vector, dimension_vector, max_seq_len_scalar, dimension_scalar, X_general_train.shape[1], SPECIAL_VALUE, **model_hyperparameter)

    soh_model.fit([X_vector_train, X_scalar_train, X_general_train], y_train, epochs=train_hyperparameter['normal_epochs'], batch_size=train_hyperparameter['batch_size'], verbose=0)
    
    soh_pred_train = soh_model.predict([X_vector_train, X_scalar_train, X_general_train])
    soh_pred_test = soh_model.predict([X_vector_test, X_scalar_test, X_general_test])
    mae_soh = mean_absolute_error(y_test, soh_pred_test)
    maep_soh = mean_absolute_percentage_error(y_test, soh_pred_test)

    results = {}
    results['SOH'] = {
        'mae': mae_soh,
        'maep': maep_soh
    }
    
    scaler = MinMaxScaler()
    SOH_pred_train_norm = scaler.fit_transform(soh_pred_train)
    SOH_pred_test_norm = scaler.transform(soh_pred_test)

    X_general_train = np.hstack((X_general_train, SOH_pred_train_norm))
    X_general_test = np.hstack((X_general_test, SOH_pred_test_norm))
    
    # Train ensemble for all parameters with transfer learning
    ensemble = {}

    parameters = ['alpha_PE', 'alpha_NE', 'beta_PE', 'beta_NE']

    for i, parameter in enumerate(parameters):
        y = output[[parameter]].values
        y_train = y[:train_size]
        pretrained_model = None
        if parameter == 'alpha_NE':
            pretrained_model = ensemble['alpha_PE'][0]
        if parameter == 'beta_PE' and transfer_beta:
            pretrained_model = ensemble['alpha_NE'][0]
        if parameter == 'beta_NE':
            pretrained_model = ensemble['beta_PE'][0]
        ensemble[parameter] = fit_ensemble(n_members,
                                            X_vector_train,
                                            X_scalar_train, 
                                            X_general_train, 
                                            y_train,  
                                            max_seq_len_vector, 
                                            dimension_vector, 
                                            max_seq_len_scalar, 
                                            dimension_scalar, 
                                            SPECIAL_VALUE,
                                            model_hyperparameter,
                                            train_hyperparameter,
                                            parameter,
                                            pretrained_model)
    
    # Train models to predict aleatoric uncertainty for each parameter
    aleatoric_std = {}
    for parameter in ['alpha_PE', 'alpha_NE', 'beta_PE', 'beta_NE']:
        y = output[[parameter]].values
        y_test = y[train_size:]
        aleatoric_std[parameter] = estimate_aleatoric_uncertainty(
            ensemble[parameter], 
            max_seq_len_vector,
            dimension_vector,
            max_seq_len_scalar,
            dimension_scalar,
            X_general_train.shape[1],
            SPECIAL_VALUE,
            X_vector_test, 
            X_scalar_test, 
            X_general_test, 
            y_test,
            model_hyperparameter,
            train_hyperparameter)
        
    # Evaluate ensemble for each parameter
    for parameter in ['alpha_PE', 'alpha_NE', 'beta_PE', 'beta_NE']:
        y = output[[parameter]].values
        y_test = y[train_size:]
        results[parameter] = evaluate_ensemble(ensemble[parameter], X_vector_test, X_scalar_test, X_general_test, y_test, aleatoric_std[parameter])
        
    # Save results
    trial = {
        'model_hyperparameter': model_hyperparameter,
        'train_hyperparameter': train_hyperparameter,
        'additional_hyperparameter': {
            'n_members': n_members,
            'transfer_beta': transfer_beta,
            'resampling_frequency': resampling_frequency
        },
        'results': results
    }

    def convert_numpy_types(obj):
        if isinstance(obj, np.float32):
            return float(obj)
        elif isinstance(obj, np.int32):
            return int(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")

    # Load the existing JSON data from file
    try:
        with open('bootstrap_ensemble_results.json', 'r') as json_file:
            data = json.load(json_file)
    except FileNotFoundError:
        data = []

    # Append the new trial
    data.append(trial)

    # Save the updated data back to the JSON file
    with open('bootstrap_ensemble_results.json', 'w') as json_file:
        json.dump(data, json_file, indent=4, default=convert_numpy_types)


New Iteration Start
