### Dataset

#### Import

In [2]:
import pandas as pd
import numpy as np 
from keras.preprocessing.sequence import pad_sequences
import mat4py as mpy

In [3]:
filename_mat = "Degradation_Prediction_Dataset_ISEA.mat"

In [4]:
data_loader = mpy.loadmat(filename_mat)

data = pd.DataFrame.from_dict(data_loader["TDS"])

In [5]:
# Padding the sequences in 'History' and 'Target'
data['Padded_History'] = pad_sequences(data['History'], padding='pre', dtype='float32').tolist()
data['Padded_Target'] = pad_sequences(data['Target'], padding='post', dtype='float32').tolist()

In [6]:
# Initialize a list to store the processed data
processed_data = []

# Iterate over the DataFrame rows
for _, row in data.iterrows():
    # Create a dictionary for each entry
    processed_entry = {
        'Cell': row['Cell'],                     # Cell ID
        'Sample': row['Sample'],                 # Sample number
        'History_Cycle': row['History_Cycle'],   # Capacity history cycle numbers
        'Target_Cycle': row['Target_Cycle'],     # Target cycle numbers
        'Padded_History': row['Padded_History'], # Use padded history
        'Padded_Target': row['Padded_Target']    # Use padded target
    }
    # Append the processed entry to the list
    processed_data.append(processed_entry)

# Convert the list of dictionaries into a DataFrame
processed_df = pd.DataFrame(processed_data)

In [7]:
def generate_dataset(data, shuffle=False):
    number_of_rows = len(data)  # number of rows in the DataFrame
    inputlist = []             # list to store processed input arrays
    targetlist = []            # list to store processed target arrays

    # Extract each row from the DataFrame
    for row in range(number_of_rows):
        # Extract input (capacity history)
        input_row = data.iloc[row, -2]  # Extract capacity history
        input_row = np.array(input_row, dtype=np.float32)
        inputlist.append(input_row)

        # Extract target (capacity degradation curve)
        target_row = data.iloc[row, -1]  # Extract target degradation curve
        target_row = np.array(target_row, dtype=np.float32)  # Ensure it is a NumPy array of floats
        targetlist.append(target_row)

    # Convert lists to NumPy arrays
    input_array = np.array(inputlist, dtype=np.float32)  # Create a 3D array: (number_of_rows, number_of_samples, 1)
    target_array = np.array(targetlist, dtype=np.float32)  # Create a 3D array: (number_of_rows, number_of_targets, 1)

    if shuffle:
        # Shuffle input and target arrays in tandem
        indices = np.arange(number_of_rows)
        np.random.shuffle(indices)
        input_array = input_array[indices]
        target_array = target_array[indices]
        print('Shuffled set', end=' - ')
    else:
        print('Non Shuffled set', end=' - ')

    print(input_array.shape, target_array.shape)  # Print the shape of the processed arrays
    return input_array, target_array

In [8]:
features_file, labels_file = generate_dataset(processed_df)

Non Shuffled set - (10686, 288) (10686, 39)


In [28]:
features_file

array([[1.7973466, 1.7832394, 1.7703887, ..., 0.       , 0.       ,
        0.       ],
       [1.7957194, 1.7817528, 1.7690253, ..., 0.       , 0.       ,
        0.       ],
       [1.7941061, 1.7802836, 1.7676731, ..., 0.       , 0.       ,
        0.       ],
       ...,
       [1.3931186, 1.3476663, 1.2980216, ..., 0.       , 0.       ,
        0.       ],
       [1.3882458, 1.3424069, 1.2921164, ..., 0.       , 0.       ,
        0.       ],
       [1.3833266, 1.3371087, 1.2861387, ..., 0.       , 0.       ,
        0.       ]], dtype=float32)

In [9]:
labels_file

array([[1.7973466, 1.7832394, 1.7703887, ..., 0.       , 0.       ,
        0.       ],
       [1.7957194, 1.7817528, 1.7690253, ..., 0.       , 0.       ,
        0.       ],
       [1.7941061, 1.7802836, 1.7676731, ..., 0.       , 0.       ,
        0.       ],
       ...,
       [1.3931186, 1.3476663, 1.2980216, ..., 0.       , 0.       ,
        0.       ],
       [1.3882458, 1.3424069, 1.2921164, ..., 0.       , 0.       ,
        0.       ],
       [1.3833266, 1.3371087, 1.2861387, ..., 0.       , 0.       ,
        0.       ]], dtype=float32)