In [102]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [103]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if torch.cuda.is_available():
    print(f'Using device: {device}')
    print(f'GPU: {torch.cuda.get_device_name(0)}')
else:
    print(f'Using device: {device}')

Using device: cuda
GPU: NVIDIA GeForce RTX 4050 Laptop GPU


In [104]:
# Load data
df = pd.read_csv("BMED_DB_augmented.csv")
df = df[df['exp'].isin([1,29])].reset_index(drop=True)
df

Unnamed: 0,exp,V,E,t,VF,VA,VB,CF_LA,CA_LA,CF_K,CB_K,I
0,1,20,0.25,0.0,1.0,1.0,1.0,0.5,0.0,1.0,0.0,0.1
1,1,20,0.25,0.25,1.008596,0.997544,0.99386,0.49645,0.000506,0.99946,-0.008859,0.184844
2,1,20,0.25,0.5,1.016095,0.995401,0.988503,0.492272,0.001876,0.997881,-0.013408,0.26375
3,1,20,0.25,0.75,1.021399,0.993886,0.984715,0.487561,0.003892,0.995323,-0.015084,0.335781
4,1,20,0.25,1.0,1.023411,0.993311,0.983278,0.482414,0.006335,0.991847,-0.015324,0.4
5,1,20,0.25,1.25,1.023411,0.993311,0.983278,0.476815,0.010311,0.968848,0.00615,0.455625
6,1,20,0.25,1.5,1.023411,0.993311,0.983278,0.470559,0.016686,0.917945,0.059099,0.505
7,1,20,0.25,1.75,1.023411,0.993311,0.983278,0.463449,0.024872,0.854534,0.126314,0.551875
8,1,20,0.25,2.0,1.023411,0.993311,0.983278,0.45529,0.034281,0.79401,0.190589,0.6
9,1,20,0.25,2.25,1.019035,0.994249,0.986841,0.445796,0.046281,0.737668,0.251391,0.653125


In [105]:
# Robust min-max scaling including safety margin
ranges ={
'V' : {'min':0, 'max':50},
'E' : {'min':0, 'max':1},
'VF' : {'min':0, 'max':2},
'VA' : {'min':0, 'max':2},
'VB' : {'min':0, 'max':8},
'CF_LA' : {'min':-1, 'max':4},
'CA_LA' : {'min':-1, 'max':4},
'CF_K' : {'min':-1, 'max':7},
'CB_K' : {'min':-1, 'max':2},
'I' : {'min':0, 'max':5},
}

In [106]:
# Data normalization
ndf = pd.DataFrame()
ndf['exp'] = df['exp']; ndf['t'] = df['t']

for col in ['V', 'E', 'VF', 'VA', 'VB', 'CF_LA', 'CA_LA', 'CF_K', 'CB_K', 'I']:
    if col in ranges:
        ndf[col] = (df[col] - ranges[col]['min'])/(ranges[col]['max'] - ranges[col]['min'])
    else:
        ndf[col] = df[col]

ndf

Unnamed: 0,exp,t,V,E,VF,VA,VB,CF_LA,CA_LA,CF_K,CB_K,I
0,1,0.0,0.4,0.25,0.5,0.5,0.125,0.3,0.2,0.25,0.333333,0.02
1,1,0.25,0.4,0.25,0.504298,0.498772,0.124232,0.29929,0.200101,0.249933,0.33038,0.036969
2,1,0.5,0.4,0.25,0.508048,0.497701,0.123563,0.298454,0.200375,0.249735,0.328864,0.05275
3,1,0.75,0.4,0.25,0.5107,0.496943,0.123089,0.297512,0.200778,0.249415,0.328305,0.067156
4,1,1.0,0.4,0.25,0.511706,0.496656,0.12291,0.296483,0.201267,0.248981,0.328225,0.08
5,1,1.25,0.4,0.25,0.511706,0.496656,0.12291,0.295363,0.202062,0.246106,0.335383,0.091125
6,1,1.5,0.4,0.25,0.511706,0.496656,0.12291,0.294112,0.203337,0.239743,0.353033,0.101
7,1,1.75,0.4,0.25,0.511706,0.496656,0.12291,0.29269,0.204974,0.231817,0.375438,0.110375
8,1,2.0,0.4,0.25,0.511706,0.496656,0.12291,0.291058,0.206856,0.224251,0.396863,0.12
9,1,2.25,0.4,0.25,0.509518,0.497125,0.123355,0.289159,0.209256,0.217208,0.41713,0.130625


In [107]:
# prepare data
def prepare_data(ndf):
    '''
    prepare data list for each experiment

    Args:
        ndf: normalized dataframe
    
    Returns:
        Vt_list: list of applied voltage
        E_list: list of external electrolyte concentration
        CFLA_list: list of feed LA concentration
        CALA_list: list of acid LA concentration
        CFK_list: list of feed K concentration
        CBK_list: list of base K concentration
        VF_list: list of feed volume
        VA_list: list of acid volume
        VB_list: list of base volume
        I_list: list of current
    '''
    Vt_list, E_list, CFLA_list, CALA_list, CFK_list, CBK_list, VF_list, VA_list, VB_list, I_list = [], [], [], [], [], [], [], [], [], []

    for exp_num in ndf['exp'].unique():
        exp_data = ndf[ndf['exp'] == exp_num]

        # operating conditions
        Vt_list.append(exp_data['V'].values)
        E_list.append(exp_data['E'].values)

        # concentrations
        CFLA_list.append(exp_data['CF_LA'].values)
        CALA_list.append(exp_data['CA_LA'].values)
        CFK_list.append(exp_data['CF_K'].values)
        CBK_list.append(exp_data['CB_K'].values)

        # volumes
        VF_list.append(exp_data['VF'].values)
        VA_list.append(exp_data['VA'].values)
        VB_list.append(exp_data['VB'].values)

        # current
        I_list.append(exp_data['I'].values)

    return Vt_list, E_list, CFLA_list, CALA_list, CFK_list, CBK_list, VF_list, VA_list, VB_list, I_list

Vt_list, E_list, CFLA_list, CALA_list, CFK_list, CBK_list, VF_list, VA_list, VB_list, I_list = prepare_data(ndf)

In [108]:
# Pad sequences
def pad_sequences(data_list, max_length=None, pad_value=-100.0):
    '''
    Pad variables length sequences to the same length

    Args:
        data_list: list of tensors with different sequence lengths
        max_length: maximum length to pad to (default: longest sequence)
        pad_value: value to use for padding

    Returns:
        padded_tensor: [batch_size, max_length, ...] - padded sequences
        seq_lengths: [batch_size] - original sequence lengths
    '''

    if max_length is None:
        max_length = max(data.shape[0] for data in data_list) # Auto-calculate the max length
    
    batch_size = len(data_list) # Batch size
    seq_lengths = torch.tensor([data.shape[0] for data in data_list]) # Actual sequential length for each experiments
    dimensions = data_list[0].shape[1:] # Get shape of individual elements
    padded_tensor = torch.full((batch_size, max_length) + dimensions, pad_value, dtype=torch.float32) # generaste padded tensor filled with pad_value

    # Fill with actual data
    for i, data in enumerate(data_list):
        padded_tensor[i, :data.shape[0]] = torch.tensor(data[:data.shape[0]], dtype=torch.float32)
    
    return padded_tensor, seq_lengths, max_length

Vt, seq_lengths, max_length = pad_sequences(Vt_list)
E, _, _ = pad_sequences(E_list,max_length = max_length)
CFLA, _, _ = pad_sequences(CFLA_list,max_length = max_length)
CALA, _, _ = pad_sequences(CALA_list,max_length = max_length)
CFK, _, _ = pad_sequences(CFK_list,max_length = max_length)
CBK, _, _ = pad_sequences(CBK_list,max_length = max_length)
VF, _, _ = pad_sequences(VF_list,max_length = max_length)
VA, _, _ = pad_sequences(VA_list,max_length = max_length)
VB, _, _ = pad_sequences(VB_list,max_length = max_length)
I, _, _ = pad_sequences(I_list,max_length = max_length)

In [109]:
# Prepare input tensor
def prepare_input(Vt, E, CFLA, CALA, CFK, CBK, VF, VA, VB, seq_lengths):
    '''
    prepare input tensor for the model with padding support

    Args:
        Vt: applied voltage
        E: external electrolyte concentration
        CFLA: feed LA concentration
        CALA: acid LA concentration
        CFK: feed K concentration
        CBK: base K concentration
        VF: feed volume
        VA: acid volume
        VB: base volume
        seq_lengths: actual sequence lengths

    Returns:
        input_tensor: [batch_size, seq_len, 3, 6] - formatted input for CNN-LSTM
        initial_state: [batch_size, 3, 3] - initial concentrations and volumes
        mask: [batch_size, seq_len] - padding mask
        seq_lengths: [batch_size] - actual sequence lengths
    '''

    batch_size, seq_len = Vt.shape # Get batch size and sequence length for set the size of input tensor
    input = torch.zeros(batch_size, seq_len, 9) # Generate input tensor

    # Fill input tensor for each channel
    input[:, :, 0] = Vt # Applied voltage
    input[:, :, 1] = E # External electrolyte concentration
    input[:, :, 2] = CFLA # Feed LA concentration
    input[:, :, 3] = CALA # Acid LA concentration
    input[:, :, 4] = CFK # Feed K concentration
    input[:, :, 5] = CBK # Base K concentration
    input[:, :, 6] = VF # Feed volume
    input[:, :, 7] = VA # Acid volume
    input[:, :, 8] = VB # Base volume

    # initial state for each feature
    init = torch.zeros(batch_size, 9)
    init[:, 0] = Vt[:, 0] # Initial applied voltage
    init[:, 1] = E[:, 0] # Initial external electrolyte concentration
    init[:, 2] = CFLA[:, 0] # Initial feed LA concentration
    init[:, 3] = CALA[:, 0] # Initial acid LA concentration
    init[:, 4] = CFK[:, 0] # Initial feed K concentration
    init[:, 5] = CBK[:, 0] # Initial base K concentration
    init[:, 6] = VF[:, 0] # Initial feed volume
    init[:, 7] = VA[:, 0] # Initial acid volume
    init[:, 8] = VB[:, 0] # Initial base volume

    # Create padding mask
    mask = torch.zeros(batch_size, seq_len)
    for i, length in enumerate(seq_lengths):
        mask[i, :length] = 1.0

    return input, init, mask, seq_lengths

input_tensor, init, mask, seq_lengths = prepare_input(Vt, E, CFLA, CALA, CFK, CBK, VF, VA, VB, seq_lengths)

In [110]:
# Generate Dataset by experiments
class BMEDDataset(Dataset):
    def __init__(self, inputs, init, masks, seq_lengths, I_exp, CFLA_exp, CALA_exp, CFK_exp, CBK_exp, VF_exp, VA_exp, VB_exp):
        self.inputs = inputs
        self.init = init
        self.masks = masks
        self.seq_lengths = seq_lengths
        self.I_exp = I_exp
        self.CFLA_exp = CFLA_exp
        self.CALA_exp = CALA_exp
        self.CFK_exp = CFK_exp
        self.CBK_exp = CBK_exp
        self.VF_exp = VF_exp
        self.VA_exp = VA_exp
        self.VB_exp = VB_exp

    def __len__(self):
        return len(self.inputs)
    
    def __getitem__(self, idx):
        return {
            'input': self.inputs[idx],
            'init': self.init[idx],
            'mask': self.masks[idx],
            'seq_length': self.seq_lengths[idx],
            'I_exp': self.I_exp[idx],
            'CFLA_exp': self.CFLA_exp[idx],
            'CALA_exp': self.CALA_exp[idx],
            'CFK_exp': self.CFK_exp[idx],
            'CBK_exp': self.CBK_exp[idx],
            'VF_exp': self.VF_exp[idx],
            'VA_exp': self.VA_exp[idx],
            'VB_exp': self.VB_exp[idx]
        }
train_dataset = BMEDDataset(input_tensor, init, mask, seq_lengths, I, CFLA, CALA, CFK, CBK, VF, VA, VB)

In [111]:
# Generate DataLoader
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)

In [112]:
# Model Initialization
class BMEDModel(nn.Module):
    def __init__(self, hidden_nodes = 64, num_rnn_layers = 2, num_fnn_layers = 2,max_len = 37, dt = 0.25):
        super(BMEDModel, self).__init__()

        self.max_len = max_len
        self.input_features = 9
        self.flux_features = 4
        self.current_features = 1
        self.dt = dt # time step

        # Layer Normalization
        self.layer_norm = nn.LayerNorm(self.input_features)

        # RNN layers
        self.rnn_layers = nn.RNN(
            input_size = self.input_features,
            hidden_size = hidden_nodes,
            num_layers = num_rnn_layers,
            batch_first = True,
            dropout = 0.2 if num_rnn_layers > 1 else 0
        )

        # Flux Head
        flux_layers = []
        flux_sizes = [hidden_nodes]
        flux_step = (hidden_nodes - self.flux_features) / (num_fnn_layers)
        
        for i in range(num_fnn_layers):
            next_size = int(hidden_nodes - flux_step * (i + 1))
            if i == num_fnn_layers - 1:
                next_size = self.flux_features
            
            flux_layers.append(nn.Linear(flux_sizes[-1], next_size))
            flux_layers.append(nn.ReLU())
            flux_layers.append(nn.Dropout(0.2))
            flux_sizes.append(next_size)
            
        self.flux_NN = nn.Sequential(*flux_layers)
        

        # Current Head
        current_layers = []
        current_sizes = [hidden_nodes]
        current_step = (hidden_nodes - self.current_features) / (num_fnn_layers)
        
        for i in range(num_fnn_layers):
            next_size = int(hidden_nodes - current_step * (i + 1))
            if i == num_fnn_layers - 1:
                next_size = self.flux_features
            
            current_layers.append(nn.Linear(current_sizes[-1], next_size))
            current_layers.append(nn.ReLU())
            current_layers.append(nn.Dropout(0.2))
            current_sizes.append(next_size)
        
        self.current_NN = nn.Sequential(*current_layers)

    def physical_layer(self, fluxes, init, seq_len):
        '''
        Physical simulation using predicted fluxes
        fluxes: [batch_size, seq_len, 4] - [dLA, dK, dH2O_A, dH2O_B]
        init: [batch_size, 7] - [CFLA, CALA, CFK, CBK, VF, VA, VB]
        seq_len: [batch_size] - actual sequence lengths for each sample

        returns: [batch_size, seq_len, 7] - [CFLA, CALA, CFK, CBK, VF, VA, VB]
        '''
        batch_size, seq_len, _ = fluxes.shape

        device = fluxes.device

        # initialize output tensor
        outputs = torch.zeros(batch_size, seq_len, 7, device=device) # [batch, time, states]

        # set initial conditions
        cur_state = init.clone()

        for t in range(seq_len):
            # extract fluxes for current time step
            LA_flux = fluxes[:, t, 0] # LA migration (Feed -> Acid)
            K_flux = fluxes[:, t, 1] # K migration (Feed -> Base)
            VFA_flux = fluxes[:, t, 2] # H2O migration from Feed to Acid (Feed -> Acid)
            VFB_flux = fluxes[:, t, 3] # H2O migration from Feed to Base (Feed -> Base)

            # only update if within actual sequence lengths
            time_mask = (t < seq_lengths).float()
            LA_flux = LA_flux * time_mask
            K_flux = K_flux * time_mask
            VFA_flux = VFA_flux * time_mask
            VFB_flux = VFB_flux * time_mask

            # mass balance calculations
            cur_state = self.MB_step(cur_state, LA_flux, K_flux, VFA_flux, VFB_flux)

            # store results
            outputs[:, t, :] = cur_state

        return outputs

    def MB_step(self, state, LA_flux, K_flux, VFA_flux, VFB_flux):
        '''
        Perform one time step of mass balance
        state: [batch, 7] - [CFLA, CALA, CFK, CBK, VF, VA, VB]
        '''
        new_state = state.clone()

        # extract current values
        # channel 0: feed, channel 1: acid, channel 2: base
        # property 0: LA_conc, property 1: K_conc, property 2: volume

        CFLA = state[:, 0]
        CALA = state[:, 1]
        CFK = state[:, 2]
        CBK = state[:, 3]
        VF = state[:, 4]
        VA = state[:, 5]
        VB = state[:, 6]

        # volume changes due to water flux
        # Assuming positive flux means water moves from feed to acid or base
        nVF = VF - (VFA_flux + VFB_flux) * self.dt # Feed Volume
        nVA = VA + VFA_flux * self.dt # Acid Volume
        nVB = VB + VFB_flux * self.dt # Base Volume

        # LA mass balance
        nNFLA = CFLA*VF - LA_flux*self.dt
        nNALA = CALA*VA + LA_flux*self.dt
        nNFK = CFK*VF - K_flux*self.dt
        nNBK = CBK*VB + K_flux*self.dt

        # update states
        new_state[:, 0] = nNFLA / (nVF + 1e-8) # new Feed LA concentration
        new_state[:, 1] = nNALA / (nVA + 1e-8) # new Acid LA concentration
        new_state[:, 2] = nNFK / (nVF + 1e-8) # new Feed K concentration
        new_state[:, 3] = nNBK / (nVB + 1e-8) # new Base K concentration
        new_state[:, 4] = nVF # new Feed Volume
        new_state[:, 5] = nVA # new Acid Volume
        new_state[:, 6] = nVB # new Base Volume
    
        return new_state
    
    def forward(self, x, init, seq_len):
        '''
        x: [batch_size, seq_len, 9] - [Vt, E, CFLA, CALA, CFK, CBK, VF, VA, VB]
        init: [batch_size, 7] - [CFLA, CALA, CFK, CBK, VF, VA, VB]
        seq_len: [batch_size] - actual sequence lengths for each sample
        mask: [batch_size, seq_len] - mask for padded positions
        '''

        batch_size, seq_len, features = x.shape

        # ====================== Neural Netowrk Part ======================
        
        # Layer Normalization
        rnn_input = self.layer_norm(x)
        
        # Pack padded sequence for RNN
        rnn_input = nn.utils.rnn.pack_padded_sequence(
            rnn_input, seq_len.cpu(), batch_first = True, enforce_sorted=False
        )

        rnn_out, _ = self.rnn_layers(rnn_input)
        rnn_out, _ = nn.utils.rnn.pad_packed_sequence(rnn_out, batch_first=True, total_length=self.max_len) # pad 무시 후 다시 복원하여 layer 처리가 용이하도록 변경

        # Predict Fluxes for each time step
        fluxes = self.flux_NN(rnn_out) # [batch, seq_len, 4]

        # Predict Current for each time step
        current = self.current_NN(rnn_out) # [batch, seq_len, 1]
        