In [1]:
import numpy as np
from keras.utils import Sequence
from scipy.io import loadmat
from keras.preprocessing.sequence import pad_sequences

class MatDataGenerator(Sequence):
    def __init__(self, mat_files, batch_size, shuffle=True):
        self.mat_files = mat_files
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indices = np.arange(len(mat_files))
        if self.shuffle:
            np.random.shuffle(self.indices)

        # Determine max_length once during initialization
        self.max_length = self.determine_max_length()

    def determine_max_length(self):
        max_length = 0
        for file in self.mat_files:
            mat_data = loadmat(file, squeeze_me=True, struct_as_record=False)
            length = len(mat_data['id'])
            if length > max_length:
                max_length = length
        return max_length

    def __len__(self):
        return int(np.ceil(len(self.mat_files) / float(self.batch_size)))

    def __getitem__(self, idx):
        # Get batch of .mat file names
        batch_indices = self.indices[idx*self.batch_size:(idx+1)*self.batch_size]
        batch_mat_files = [self.mat_files[i] for i in batch_indices]
        
        # Lists to store batch data
        X = [] # inputs
        y = [] # outputs

        # Loop over each .mat file in the batch
        for file in batch_mat_files:
            mat_data = loadmat(file, squeeze_me=True, struct_as_record=False)
            
            # Extract input and output variables and reshape them
            inputs = np.hstack([mat_data[var].reshape(-1, 1) for var in ['id', 'id_ref', 'IntErr_Id', 'IntErr_Iq', 'iq', 'iq_ref']])
            outputs = np.hstack([mat_data[var].reshape(-1, 1) for var in ['v_md', 'v_mq']])
            
            # Pad the sequences to self.max_length
            inputs = pad_sequences(inputs, maxlen=self.max_length, padding='post')
            outputs = pad_sequences(outputs, maxlen=self.max_length, padding='post')
            
            X.append(inputs)
            y.append(outputs)

        # Convert lists to numpy arrays
        X = np.array(X)
        y = np.array(y)

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


In [2]:
import os

data_dir = "D:/Data"  # Directory containing all .mat files

all_mat_files = [os.path.join(data_dir, file) for file in os.listdir(data_dir) if file.endswith('.mat')]

np.random.shuffle(all_mat_files)  # Shuffle the list of .mat files

# divide the data into train, test and validation sets
# 70% percent of the data is used for training, 15% for testing and 15% for validation

num_files = len(all_mat_files)
train_files = all_mat_files[:int(0.7 * num_files)] 
test_files = all_mat_files[int(0.7 * num_files):int(0.85 * num_files)]
val_files = all_mat_files[int(0.85 * num_files):]

In [3]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Initializing data generators
train_gen = MatDataGenerator(train_files, batch_size=32)
test_gen = MatDataGenerator(test_files, batch_size=32)
val_gen = MatDataGenerator(val_files, batch_size=32)

# the model is created 
model = Sequential()
model.add(LSTM(50, input_shape=(train_gen.max_length, 6)))
model.add(Dense(2))

model.compile(optimizer='adam', loss='mse')  

# Training the model
model.fit(train_gen, validation_data=val_gen, epochs=10)

MemoryError: Unable to allocate 58.6 TiB for an array with shape (3989800, 4038055) and data type int32