In [1]:
%pip install bayesian-optimization
%pip install GPUtil

Collecting bayesian-optimization
  Downloading bayesian_optimization-2.0.3-py3-none-any.whl.metadata (9.0 kB)
Collecting colorama<0.5.0,>=0.4.6 (from bayesian-optimization)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading bayesian_optimization-2.0.3-py3-none-any.whl (31 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-2.0.3 colorama-0.4.6
Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25l[?25hdone
  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7392 sha256=c2a6ac05949d78885484413c5789124f6c3b2c674296c4565fb919f2ee66cc94
  Stored in directory: /root/.cache/pip/wheels/2b/4d/8f/55fb4f7b9b591891e8d3f72977c4ec6c7763b39c19f0861595
Successfully built GPUtil
Install

## Importing files

In [2]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import Variable
import GPUtil
import psutil
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import warnings
warnings.filterwarnings('ignore')

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [135]:
# Define the data folder path
data_folder = "/content/drive/My Drive/Colab Notebooks/Parkinson/Data_Preprocessed1"

# Generate the list of file paths
file_list = [os.path.join(data_folder, f"{i}.xlsx") for i in range(1, 6)]

# Load and concatenate the first three files for training
train_files = file_list[:3]
train_data = pd.concat([pd.read_excel(file) for file in train_files], ignore_index=True)
train_data.dropna(inplace=True)
# Load the fourth file for testing
test_data = pd.read_excel(file_list[4])
test_data.dropna(inplace=True)
# Load the fifth file for validation
val_data = pd.read_excel(file_list[3])
val_data.dropna(inplace=True)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_data[['ax', 'ay', 'az']] = scaler.fit_transform(train_data[['ax', 'ay', 'az']])
test_data[['ax', 'ay', 'az']] = scaler.transform(test_data[['ax', 'ay', 'az']])
val_data[['ax', 'ay', 'az']] = scaler.transform(val_data[['ax', 'ay', 'az']])

# Print the shapes of the datasets
print("Training data shape:", train_data.shape)
print("Testing data shape:", test_data.shape)
print("Validation data shape:", val_data.shape)

Training data shape: (36473, 4)
Testing data shape: (11533, 4)
Validation data shape: (9297, 4)


In [136]:
def preprocess_time_series(data, window_size=50, overlap=1):
    """
    Preprocesses time-series data into sliding windows.

    Args:
        data: A Pandas DataFrame containing the dataset with columns ['ax', 'ay', 'az', 'label'].
        window_size: Size of each sliding window.
        overlap: Overlap ratio between consecutive windows.

    Returns:
        X: Array of sliding window data.
        y: Array of labels.
    """
    X = []
    y = []

    # Calculate overlap size and step size
    overlap_size = int(window_size * overlap)
    entry_size = max(1, window_size - overlap_size)  # Ensure step size is at least 1

    # Ensure the required columns are present
    if not set(['ax', 'ay', 'az', 'label']).issubset(data.columns):
        raise ValueError("Data must contain columns: ['ax', 'ay', 'az', 'label']")

    # Convert data to NumPy arrays for processing
    df_np = data[['ax', 'ay', 'az']].to_numpy()  # Select only ax, ay, az
    labels = data['label'].to_numpy()  # Extract the label column

    # Create sliding windows
    for i in range(0, len(df_np) - window_size + 1, entry_size):
        # Append the window of data
        X.append(df_np[i:i + window_size, :])
        # Use the majority label in the window
        y.append(np.bincount(labels[i:i + window_size]).argmax())

    return np.array(X), np.array(y)


In [138]:
# Preprocess data into sliding windows
window_size = 50
overlap = 1
X_train, y_train = preprocess_time_series(train_data, window_size, overlap)
X_test, y_test = preprocess_time_series(test_data, window_size, overlap)
X_val, y_val = preprocess_time_series(val_data, window_size, overlap)

In [139]:
# display the shape of the train, validation and test sets
print("Shape of the train set: ", X_train.shape)
print("Shape of the validation set: ", X_val.shape)
print("Shape of the test set: ", X_test.shape)

# display the shape of the train, validation and test sets
print("Shape of the y_train set: ", y_train.shape)
print("Shape of the y_validation set: ", y_val.shape)
print("Shape of the y_test set: ", y_test.shape)

Shape of the train set:  (36424, 50, 3)
Shape of the validation set:  (9248, 50, 3)
Shape of the test set:  (11484, 50, 3)
Shape of the y_train set:  (36424,)
Shape of the y_validation set:  (9248,)
Shape of the y_test set:  (11484,)


# Pytorch

In [140]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')

device

device(type='cuda')

In [141]:
# Convertir les données en tenseurs PyTorch
X_train_tensors = torch.Tensor(X_train)
y_train_tensors = torch.Tensor(y_train).long()

X_val_tensors = torch.Tensor(X_val)
y_val_tensors = torch.Tensor(y_val).long()

X_test_tensors = torch.Tensor(X_test)
y_test_tensors = torch.Tensor(y_test).long()

In [142]:
# Créer les DataLoaders
train_dataset = TensorDataset(X_train_tensors, y_train_tensors)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = TensorDataset(X_val_tensors, y_val_tensors)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_dataset = TensorDataset(X_test_tensors, y_test_tensors)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## LSTM MODEL

In [145]:
class LstmModel(nn.Module):
    def __init__(self, input_size, hidden_size_lstm, hidden_size_fc, num_layers, dp, seq_length):
        super(LstmModel, self).__init__()
        self.num_classes = 1  # Single output for binary classification
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size_lstm = hidden_size_lstm
        self.hidden_size_fc = hidden_size_fc
        self.seq_length = seq_length

        self.lstm_1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size_lstm,
                              num_layers=num_layers, batch_first=True)
        self.lstm_2 = nn.LSTM(input_size=hidden_size_lstm, hidden_size=hidden_size_lstm//2,
                              num_layers=num_layers, batch_first=True)
        self.lstm_3 = nn.LSTM(input_size=hidden_size_lstm//2, hidden_size=hidden_size_lstm//4,
                              num_layers=num_layers, batch_first=True)
        self.lstm_4 = nn.LSTM(input_size=hidden_size_lstm//4, hidden_size=hidden_size_lstm//8,
                              num_layers=num_layers, batch_first=True)

        self.fc_1 = nn.Linear(hidden_size_lstm//8, hidden_size_fc)
        self.fc_2 = nn.Linear(hidden_size_fc, hidden_size_fc//2)
        self.fc = nn.Linear(hidden_size_fc//2, self.num_classes)  # Single output

        self.tanh = nn.Tanh()
        self.elu = nn.ELU()
        self.dp = nn.Dropout(p=dp)

        self.counter = 0
        self.min_validation_loss = float('inf')

    def forward(self, x):
        output, _ = self.lstm_1(x)
        output = self.tanh(output)

        output, _ = self.lstm_2(output)
        output = self.tanh(output)

        output, _ = self.lstm_3(output)
        output = self.tanh(output)

        output, _ = self.lstm_4(output)
        output = self.tanh(output)

        output = output[:, -1, :]  # Use the last output from the sequence

        out = self.fc_1(output)
        out = self.elu(out) 
        out = self.dp(out)
        out = self.fc_2(out)
        out = self.elu(out) 
        out = self.dp(out)
        out = self.fc(out)  # Single logit

        return out

    def early_stop(self, validation_loss, min_delta, patience):
        """Early stopping logic based on validation loss."""
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
            return False
        elif validation_loss > (self.min_validation_loss + min_delta):
            self.counter += 1
            if self.counter >= patience:
                return True
        return False

In [None]:
from tqdm.notebook import tqdm
import time

# Initialiser le modèle
input_size = 3
learning_rate = 0.001
hidden_size_l = 256
hidden_size_f = 512
num_layers = 1
dropout = 0.3
# Définir la fonction de perte et l'optimiseur
criterion = nn.BCEWithLogitsLoss()

lstm_model = LstmModel(input_size, hidden_size_l, hidden_size_f, num_layers, dropout, X_train_tensors.shape[1]).to(device)
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

history = {"loss":[], "accuracy":[], "val_loss":[], "val_accuracy":[]}

best_acc = 0

# Get GPU usage before running your code
if len(GPUtil.getGPUs()) != 0:
  start_gpu_usage = GPUtil.getGPUs()[0].load * 100

# Get CPU and Memory usage before running your code
start_memory_usage = psutil.virtual_memory().percent
start_cpu_usage = psutil.cpu_percent()

start_time = time.time()

print("----------------------------------------------------------------------------------------------")

for epoch in range(100):
  batch_train_acc = []
  batch_train_loss = []
  with tqdm(total=len(train_dataloader), desc = 'Training batch') as pbar:
    for id_batch, (x_batch, y_batch) in enumerate(train_dataloader):
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        y_batch = y_batch.float().view(-1, 1)

        lstm_model.train()
        outputs = lstm_model.forward(x_batch)
        optimizer.zero_grad()

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        batch_train_loss.append(loss.item())

        # Predicted classes with threshold 0.5
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        correct = (predicted == y_batch).sum().item()
        total = y_batch.size(0)
        batch_train_acc.append(correct / total)

        # scheduler.step(epoch_val_loss)
        pbar.update(1)

  epoch_loss = np.average(batch_train_loss)
  history["loss"].append(epoch_loss)
  accuracy = np.average(batch_train_acc)
  history["accuracy"].append(accuracy)
  for param in lstm_model.parameters():
    param.requires_grad = True

  with torch.no_grad():
    batch_val_acc = []
    batch_val_loss = []
    with tqdm(total=len(val_dataloader), desc = 'Validation batch') as pbar:
      with torch.no_grad():
          for id_batch, (x_batch, y_batch) in enumerate(val_dataloader):
              x_batch, y_batch = x_batch.to(device), y_batch.to(device)
              y_batch = y_batch.float().view(-1, 1)  # Reshape for BCEWithLogitsLoss

              lstm_model.eval()
              outputs = lstm_model(x_batch)

              loss = criterion(outputs, y_batch)
              batch_val_loss.append(loss.item())

              predicted = (torch.sigmoid(outputs) > 0.5).float()
              correct = (predicted == y_batch).sum().item()
              total = y_batch.size(0)
              batch_val_acc.append(correct / total)

              pbar.update(1)


    epoch_val_loss = np.average(batch_val_loss)
    history["val_loss"].append(epoch_val_loss)
    val_accuracy = np.average(batch_val_acc)
    history["val_accuracy"].append(val_accuracy)

    print("Epoch: %d, Loss: %1.5f, Accuracy: %1.5f, Val Loss: %1.5f, Val Accuracy: %1.5f" % (epoch+1, epoch_loss, accuracy, epoch_val_loss, val_accuracy))
    print("----------------------------------------------------------------------------------------------")

    if best_acc < val_accuracy:
      best_acc = val_accuracy
      torch.save(lstm_model.state_dict(), f"best_model.pth")
    if lstm_model.early_stop(validation_loss=epoch_val_loss, min_delta=0, patience=5):
      break

execution_time = time.time() - start_time

# Get CPU and Memory usage after running your code
end_cpu_usage = psutil.cpu_percent()
end_memory_usage = psutil.virtual_memory().percent
# Get GPU usage after running your code
if len(GPUtil.getGPUs()) != 0:
  end_gpu_usage = GPUtil.getGPUs()[0].load * 100

----------------------------------------------------------------------------------------------


Training batch:   0%|          | 0/1139 [00:00<?, ?it/s]

Validation batch:   0%|          | 0/289 [00:00<?, ?it/s]

Epoch: 1, Loss: 0.64847, Accuracy: 0.65005, Val Loss: 0.64745, Val Accuracy: 0.65311
----------------------------------------------------------------------------------------------


Training batch:   0%|          | 0/1139 [00:00<?, ?it/s]

Validation batch:   0%|          | 0/289 [00:00<?, ?it/s]

Epoch: 2, Loss: 0.64809, Accuracy: 0.65005, Val Loss: 0.64585, Val Accuracy: 0.65311
----------------------------------------------------------------------------------------------


Training batch:   0%|          | 0/1139 [00:00<?, ?it/s]

In [119]:
def objective_function(params):
    hidden_size, num_layers, dropout, learning_rate = params

    # Initialize the model with the given parameters
    model = LstmModel(num_classes=2, input_size=3, hidden_size_lstm=int(hidden_size),
                      hidden_size_fc=128, num_layers=int(num_layers), dp=dropout, seq_length=50).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model for a few epochs (early stopping integrated)
    best_acc = 0
    for epoch in range(10):  # You can increase the number of epochs here
        model.train()
        batch_train_acc = []
        batch_train_loss = []
        for X_batch, y_batch in train_dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            optimizer.zero_grad()

            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            batch_train_loss.append(loss.item())

            _, predicted = torch.max(outputs, 1)
            correct = (predicted == y_batch).sum().item()
            total = y_batch.size(0)
            batch_train_acc.append(correct / total)

        epoch_loss = np.mean(batch_train_loss)
        epoch_acc = np.mean(batch_train_acc)

        # Validation loss and accuracy
        model.eval()
        batch_val_acc = []
        batch_val_loss = []
        with torch.no_grad():
            for id_batch, (x_batch, y_batch) in enumerate(val_dataloader):
              x_batch, y_batch = x_batch.to(device), y_batch.to(device)
              y_batch = y_batch.float().view(-1, 1)  # Reshape for BCEWithLogitsLoss

              lstm_model.eval()
              outputs = lstm_model(x_batch)

              loss = criterion(outputs, y_batch)
              batch_val_loss.append(loss.item())

              predicted = (torch.sigmoid(outputs) > 0.5).float()
              correct = (predicted == y_batch).sum().item()
              total = y_batch.size(0)
              batch_val_acc.append(correct / total)


        val_loss = np.mean(batch_val_loss)
        val_acc = np.mean(batch_val_acc)

        # Early stopping check
        if model.early_stop(validation_loss=val_loss, min_delta=0.0, patience=5):
            print(f"Early stopping at epoch {epoch+1}")
            break

        # Update best accuracy if necessary
        best_acc = max(best_acc, val_acc)

    return -best_acc  # Return negative accuracy for minimization

In [14]:
# CMA-ES algorithm
def cma_es(objective_function, initial_solution, population_size=10, max_generations=1):
    n = len(initial_solution)
    sigma = 1.0
    mu = population_size // 2
    lambda_ = population_size
    current_solution = initial_solution
    current_sigma = sigma

    for generation in range(max_generations):
        solutions = np.random.multivariate_normal(current_solution, current_sigma**2 * np.identity(n), lambda_)

        # Clamp the solutions to valid ranges
        solutions[:, 0] = np.clip(solutions[:, 0], 32, 256)  # hidden_size
        solutions[:, 1] = np.clip(solutions[:, 1], 1, 4)  # num_layers
        solutions[:, 2] = np.clip(solutions[:, 2], 0.1, 0.5)  # dropout
        solutions[:, 3] = np.clip(solutions[:, 3], 0.0001, 0.01)  # learning_rate

        # Evaluate fitness
        fitness_values = np.array([objective_function(sol) for sol in solutions])
        sorted_indices = np.argsort(fitness_values)
        solutions = solutions[sorted_indices]

        # Update mean and sigma
        current_solution = np.mean(solutions[:mu], axis=0)
        z = solutions[:mu] - current_solution
        current_sigma = np.sqrt(np.mean(np.sum(z**2, axis=1)))

        print(f"Generation {generation + 1}: Best fitness = {-fitness_values[sorted_indices[0]]}")

    return current_solution, -fitness_values[sorted_indices[0]]

# Initial solution: [hidden_size, num_layers, dropout, learning_rate]
initial_solution = [128, 2, 0.3, 0.001]
best_solution, best_fitness = cma_es(objective_function, initial_solution)

print("Best hyperparameters found:")
print(f"Hidden size: {int(best_solution[0])}")
print(f"Num layers: {int(best_solution[1])}")
print(f"Dropout: {best_solution[2]}")
print(f"Learning rate: {best_solution[3]}")
print(f"Validation Accuracy: {best_fitness}")

KeyboardInterrupt: 

In [None]:
# Initialiser le modèle
input_size = 3  # Nombre de caractéristiques : ax, ay, az
hidden_size = 128
num_layers = 2
output_size = 2  # Parkinson (1) ou non (0)
dropout = 0.3
learning_rate = 0.001
num_epochs = 50
patience = 5

# Définir la fonction de perte et l'optimiseur
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Entraîner le modèle
train_model(model, train_dataloader, val_dataloader, criterion, optimizer, num_epochs, patience, device)

# Charger le meilleur modèle sauvegardé
if not os.path.exists('best_model.pth'):
    # create best_model.pth
    torch.save(model.state_dict(), 'best_model.pth')


model.load_state_dict(torch.load('best_model.pth'))
# Évaluer le modèle sur l'ensemble de test
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for X_batch, y_batch in test_dataloader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        test_correct += (predicted == y_batch).sum().item()
        test_total += y_batch.size(0)

test_acc = test_correct / test_total
print(f"Test Accuracy: {test_acc:.4f}")