# Recurrent Neural Network with Long Short-Term Memory layers

In [2]:
# Define the file paths
base_dir = '/home/vincent/AAA_projects/MVCS/Neuroscience/'
eeg_df_path = base_dir + 'DataFrames/eeg_df.csv'
merged_stim_df_path = base_dir + 'DataFrames/merged_stim_df.csv'
hurst_exponents_path = base_dir + 'HurstExponents/hurst_exponents_df.csv'
rnn_mfdfa_X_path = base_dir + 'RNN_data/rnn_X_data_combined.npy'
kuramoto_phases = '/home/vincent/AAA_projects/MVCS/Neuroscience/kuramoto_phases.npy'

# load data
eeg_df = pd.read_csv(eeg_df_path)
merged_stim_df = pd.read_csv(merged_stim_df_path)
hurst_exponents_df = pd.read_csv(hurst_exponents_path)
rnn_mfdfa_X_df_combined = np.load(rnn_mfdfa_X_path)
kuramoto_phases = np.load

In [3]:

# Load the saved PSD data
psd_data_dict = np.load(save_path, allow_pickle=True).item()

# Convert the PSD values to a 2D array format
num_channels = len(eeg_channels)
psd_array = np.array([psd_data_dict[channel] for channel in eeg_channels]).T

# Normalize the PSD values (Min-Max normalization)
psd_array = (psd_array - np.min(psd_array, axis=0)) / (np.max(psd_array, axis=0) - np.min(psd_array, axis=0) + 1e-8)

# Convert the data to PyTorch tensors
psd_tensor = torch.tensor(psd_array, dtype=torch.float32)

# Add the feature dimension
psd_tensor = psd_tensor.unsqueeze(-1)

print(psd_tensor.shape)  # Should print [sequence_length, num_channels, 1]

# Check if 'StimType' column is present in the dataframe
if 'StimType' in merged_stim_df.columns:
    # Proceed with replacement of values and dropping the column
    merged_stim_df["Frequency"] = merged_stim_df["StimType"].replace(frequency_mapping)
    merged_stim_df["Location"] = merged_stim_df["StimType"].replace(location_mapping)
    merged_stim_df.drop('StimType', axis=1, inplace=True)
else:
    print("The 'StimType' column does not exist in the dataframe.")

# Replace "Stim Start" with 1 and "Stim Stop" with 2
merged_stim_df["EventDescription"] = merged_stim_df["EventDescription"].replace({
    "Stim Start": 1,
    "Stim Stop": 2
})

# Convert data to PyTorch tensors
eeg_tensor = torch.tensor(eeg_df.values, dtype=torch.float32)
merged_stim_tensor = torch.tensor(merged_stim_df.values, dtype=torch.float32)
hurst_exponents_tensor = torch.tensor(hurst_exponents_df.values, dtype=torch.float32)
rnn_mfdfa_X_tensor = torch.tensor(rnn_mfdfa_X_df_combined, dtype=torch.float32)
kuramoto_tensor = torch.FloatTensor(kuramoto_phases).transpose(0, 1)

# Print tensor shapes
print("Shape of eeg_tensor:", eeg_tensor.shape)
print("Shape of merged_stim_tensor:", merged_stim_tensor.shape)
print("Shape of hurst_exponents_tensor:", hurst_exponents_tensor.shape)
print("Shape of rnn_X_tensor:", rnn_mfdfa_X_tensor.shape)

Shape of eeg_tensor: torch.Size([4227788, 33])
Shape of merged_stim_tensor: torch.Size([18, 9])
Shape of hurst_exponents_tensor: torch.Size([32, 1])
Shape of rnn_X_tensor: torch.Size([860, 100, 51])


# Normalize the data

In [8]:
from sklearn.preprocessing import MinMaxScaler

# Normalize eeg_tensor
eeg_scaler = MinMaxScaler()
eeg_tensor_np = eeg_scaler.fit_transform(eeg_tensor)
eeg_tensor_normalized = torch.tensor(eeg_tensor_np, dtype=torch.float32)

# Normalize merged_stim_tensor (excluding Sub# and Session columns)
merged_stim_scaler = MinMaxScaler()
merged_stim_tensor_np = merged_stim_scaler.fit_transform(merged_stim_tensor[:, 2:])
merged_stim_tensor[:, 2:] = torch.tensor(merged_stim_tensor_np, dtype=torch.float32)

# Normalize hurst_exponents_tensor
hurst_scaler = MinMaxScaler()
hurst_exponents_tensor_np = hurst_scaler.fit_transform(hurst_exponents_tensor)
hurst_exponents_tensor_normalized = torch.tensor(hurst_exponents_tensor_np, dtype=torch.float32)

# Normalize rnn_mfdfa_X_tensor
rnn_mfdfa_scaler = MinMaxScaler()
rnn_mfdfa_X_tensor_reshaped = rnn_mfdfa_X_tensor.view(-1, rnn_mfdfa_X_tensor.size(2))
rnn_mfdfa_X_tensor_np = rnn_mfdfa_scaler.fit_transform(rnn_mfdfa_X_tensor_reshaped)
rnn_mfdfa_X_tensor_normalized = torch.tensor(rnn_mfdfa_X_tensor_np, dtype=torch.float32)

# Print tensor shapes after normalization
print("Shape of eeg_tensor_normalized:", eeg_tensor_normalized.shape)
print("Shape of merged_stim_tensor:", merged_stim_tensor.shape)
print("Shape of hurst_exponents_tensor_normalized:", hurst_exponents_tensor_normalized.shape)
print("Shape of rnn_mfdfa_X_tensor:", rnn_mfdfa_X_tensor.shape)

# Print the first few rows of each normalized DataFrame
print("Normalized EEG Data:")
print(eeg_tensor_normalized[:5])  # Print first 5 rows of eeg_tensor_normalized

print("\nNormalized Hurst Exponents Data:")
print(hurst_exponents_tensor_normalized[:5])  # Print first 5 rows of hurst_exponents_tensor_normalized

print("\nNormalized Merged Stim Data:")
print(merged_stim_tensor[:5])  # Print first 5 rows of merged_stim_tensor

print("Normalized RNN MFDFa X Data:")
print(rnn_mfdfa_X_tensor_normalized[:5])


# Print tensor shapes after normalization
print("Shape of eeg_tensor_normalized:", eeg_tensor_normalized.shape)
print("Shape of merged_stim_tensor:", merged_stim_tensor.shape)
print("Shape of hurst_exponents_tensor_normalized:", hurst_exponents_tensor_normalized.shape)
print("Shape of rnn_mfdfa_X_tensor_normalized:", rnn_mfdfa_X_tensor_normalized.shape)


Shape of eeg_tensor_normalized: torch.Size([4227788, 33])
Shape of merged_stim_tensor: torch.Size([18, 9])
Shape of hurst_exponents_tensor_normalized: torch.Size([32, 1])
Shape of rnn_mfdfa_X_tensor: torch.Size([860, 100, 51])
Normalized EEG Data:
tensor([[5.7522e-01, 5.1448e-01, 1.8268e-01, 6.3061e-01, 7.4618e-01, 6.2734e-01,
         7.2461e-01, 5.1302e-01, 6.0971e-01, 4.8493e-01, 6.9506e-01, 4.1021e-01,
         4.1003e-01, 5.3116e-01, 4.8551e-01, 5.0756e-01, 2.4764e-01, 4.6979e-01,
         9.9777e-01, 4.6894e-01, 6.3074e-01, 9.9253e-01, 2.6509e-01, 4.2655e-01,
         3.1056e-01, 2.5602e-01, 9.4031e-01, 4.1962e-01, 9.3565e-01, 9.9806e-01,
         6.7768e-01, 5.8537e-01, 0.0000e+00],
        [5.7460e-01, 5.1307e-01, 1.8183e-01, 6.3044e-01, 7.4559e-01, 6.2672e-01,
         7.2198e-01, 5.1129e-01, 6.0946e-01, 4.8444e-01, 6.9261e-01, 4.0888e-01,
         4.0934e-01, 5.3096e-01, 4.8520e-01, 5.0703e-01, 2.4690e-01, 4.6855e-01,
         9.9677e-01, 4.6884e-01, 6.3010e-01, 9.8969e-01, 2

In [7]:
def create_sequences(tensor, seq_length):
    xs = []
    ys = []
    for i in range(len(tensor) - seq_length - 1):  # -1 to avoid index out of range for y
        x = tensor[i:i+seq_length]
        y = tensor[i+seq_length]  # the next one is the target
        xs.append(x)
        ys.append(y)
    return torch.stack(xs), torch.stack(ys)

seq_length = 50  # you can adjust this
X, y = create_sequences(eeg_tensor_normalized, seq_length)

In [16]:
# Calculate the index separating the training and test data
train_test_split_idx = int(len(rnn_mfdfa_X_tensor) * 0.8)

# Split rnn_X_tensor, eeg_tensor_normalized and hurst_exponents_tensor_normalized
train_X = rnn_mfdfa_X_tensor[:train_test_split_idx]
test_X = rnn_mfdfa_X_tensor[train_test_split_idx:]

train_X_hurst = hurst_exponents_tensor_normalized[:train_test_split_idx]
test_X_hurst = hurst_exponents_tensor_normalized[train_test_split_idx:]

train_y = eeg_tensor_normalized[:train_test_split_idx]
test_y = eeg_tensor_normalized[train_test_split_idx:]

print(f"Training set: {len(train_X)} samples")
print(f"Test set: {len(test_X)} samples")


Training set: 688 samples
Test set: 172 samples


In [25]:
print(train_X.shape)
print(train_X_hurst.shape)
print(train_y.shape)


torch.Size([688, 100, 51])
torch.Size([32, 1])
torch.Size([688, 33])


In [17]:
import torch.nn as nn

class EEGPredictor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(EEGPredictor, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Instantiate the model
input_size = train_X.shape[-1]  # input feature dimension
hidden_size = 64  # number of hidden units in LSTM
output_size = train_y.shape[-1]  # output feature dimension
model = EEGPredictor(input_size, hidden_size, output_size)

In [24]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import pytorch_lightning as pl
import optuna
from torch.utils.data import DataLoader

# Set the precision to 'high'
torch.set_float32_matmul_precision('high')

# Make sure your process_dataframes function is returning correctly shaped and combined tensors
class LSTMModel(pl.LightningModule):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, learning_rate):
        super(LSTMModel, self).__init__()
        self.save_hyperparameters()

        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # Replace nn.RNN with nn.LSTM
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().cuda()
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_().cuda()
        h0 = h0.detach()
        c0 = c0.detach()

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = nn.MSELoss()(y_pred, y)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)

def train_model(trial):
    # Define model
    model = LSTMModel(input_dim=10,  # adjust this to fit your data
                      hidden_dim=trial.suggest_categorical('hidden_dim', [32, 64, 128]),
                      layer_dim=trial.suggest_categorical('layer_dim', [2, 3, 4]),
                      output_dim=1,  # adjust this to fit your data
                      learning_rate=trial.suggest_loguniform('learning_rate', 1e-5, 1e-1))

    # Create TensorDatasets for training and test sets
    train_dataset = TensorDataset(train_X, train_X_hurst, train_y)
    test_dataset = TensorDataset(test_X, test_X_hurst, test_y)
    
    # If you want to use DataLoader for batching and shuffling during training, you can do the following:
    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)


    # Create the Trainer
    trainer = pl.Trainer(
        devices=2,  # change this according to your setup
        accelerator="cuda",  # change this according to your setup
        max_epochs=100
    )


    # Fit the model
    trainer.fit(model, train_dl)

    # Evaluate the model
    model.eval()
    with torch.no_grad():
        mse = 0
        loss = nn.MSELoss()
        for x, y in train_dl:
            mse += loss(model(x), y)
        mse /= len(train_dl)

    return mse.item()


if __name__ == "__main__":
    study = optuna.create_study(direction="minimize")
    study.optimize(train_model, n_trials=100)

    print("Number of finished trials: ", len(study.trials))
    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))


[I 2023-08-04 16:07:52,232] A new study created in memory with name: no-name-4a47711b-1093-4ef4-ad7b-27a00e5ce470
  learning_rate=trial.suggest_loguniform('learning_rate', 1e-5, 1e-1))
[W 2023-08-04 16:07:52,235] Trial 0 failed with parameters: {'hidden_dim': 64, 'layer_dim': 2, 'learning_rate': 0.003371799594331998} because of the following error: AssertionError('Size mismatch between tensors').
Traceback (most recent call last):
  File "/home/vincent/anaconda3/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_83196/2826018788.py", line 53, in train_model
    train_dataset = TensorDataset(train_X, train_X_hurst, train_y)
  File "/home/vincent/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataset.py", line 189, in __init__
    assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors), "Size mismatch between tensors"
AssertionError: Size mismatch between tensors
[W 2023-08-04 16:

AssertionError: Size mismatch between tensors

In [None]:
# Move tensors to GPU
test_X = test_X.cuda()
test_y = test_y.cuda()

# Set model to evaluation mode
model.eval()

# No gradient calculation needed
with torch.no_grad():
    # Forward pass
    predictions = model(test_X)

# Calculate the loss
loss = criterion(predictions, test_y)

print(f"Test Loss: {loss.item()}")
