## CNN (Convolutional Neural Networks)

Dans cette partie je vais essayer d'entrainer un CNN pour mon probleme de classification.

Dans un premier temps il va falloir créer un code pour arriver à utiliser correctement le dataset donnée

In [1]:
import pandas as pd
import numpy as np
import scipy.io
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from tensorflow import keras
from tensorflow.keras import layers
from datasetResize import *
from sklearn.metrics import classification_report

In [2]:
# lecture du excel
labels_df = pd.read_csv('REFERENCE-V3.csv', header=None)
labels_df.columns = ['filename', 'label']

print(labels_df.head())

  filename label
0   A00001     N
1   A00002     N
2   A00003     N
3   A00004     A
4   A00005     A


In [3]:
# Charger tous les signaux et leurs labels
labels = []
signals = []
signals_padded = []
signals_truncated = []
signals_interpolated = []

# Initialiser avec les paramètres pour le redimensionnement
ecg_resizer_max = ECGResizing(target_length=18286)
ecg_resizer_med = ECGResizing(target_length=9000)

for index, row in labels_df.iterrows():
    
    labels.append(row['label'])
    filename = row['filename']
    # Charger un signal ECG
    signal = ecg_resizer_max.load_ecg(f'training2017/{filename}.mat')
    #signals = np.append(signals,[[signal]])
    signals.append(signal)

    # Appliquer le zero padding
    signal_padded = ecg_resizer_max.resize_signal(signal, method='padding')
    signals_padded.append(signal_padded)

    # Appliquer le tronquage
    signal_truncated = ecg_resizer_med.resize_signal(signal, method='padding')
    signals_truncated.append(signal_truncated)

    # Appliquer l'interpolation
    signal_interpolated = ecg_resizer_med.resize_signal(signal, method='interpolate')
    signals_interpolated.append(signal_interpolated)

In [4]:
labels = np.array(labels)
signals_padded = np.array(signals_padded)
signals_truncated = np.array(signals_truncated)
signals_interpolated = np.array(signals_interpolated)


# Encoder les labels
label_mapping = {'N': 0, 'A': 1, 'O': 2, '~': 3}
labels_encoded = np.vectorize(label_mapping.get)(labels)

In [5]:
# Diviser en ensembles d'entraînement et de test
X_train_padded, X_test_padded, y_train_padded, y_test_padded = train_test_split(signals_padded, labels_encoded, test_size=0.2, stratify=labels, random_state=42)
X_train_truncated, X_test_truncated, y_train_truncated, y_test_truncated = train_test_split(signals_truncated, labels_encoded, test_size=0.2, stratify=labels, random_state=42)
X_train_interpolated, X_test_interpolated, y_train_interpolated, y_test_interpolated = train_test_split(signals_interpolated, labels_encoded, test_size=0.2, stratify=labels, random_state=42)

# Afficher les formes des ensembles
print(f'Ensemble d\'entraînement : {X_train_padded.shape}, Ensemble de test : {X_test_padded.shape}')

Ensemble d'entraînement : (6822, 18286), Ensemble de test : (1706, 18286)


In [6]:
import torch
from torch.utils.data import Dataset, DataLoader

class ECGDataset(Dataset):
    def __init__(self, signals, labels):
        self.signals = signals
        self.labels = labels

    def __len__(self):
        return len(self.signals)

    def __getitem__(self, idx):
        signal = torch.tensor(self.signals[idx], dtype=torch.float32).unsqueeze(0)  # Ajouter la dimension de canal
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return signal, label

# Créer des DataLoader pour les signaux interpolés, tronqués ou padded
batch_size = 32

# padded
train_dataset_padded = ECGDataset(X_train_padded, y_train_padded)
test_dataset_padded = ECGDataset(X_test_padded, y_test_padded)

train_loader_padded = DataLoader(train_dataset_padded, batch_size=batch_size, shuffle=True)
test_loader_padded = DataLoader(test_dataset_padded, batch_size=batch_size, shuffle=False)


# truncate
train_dataset_truncated = ECGDataset(X_train_truncated, y_train_truncated)
test_dataset_truncated = ECGDataset(X_test_truncated, y_test_truncated)

train_loader_truncated = DataLoader(train_dataset_truncated, batch_size=batch_size, shuffle=True)
test_loader_truncated = DataLoader(test_dataset_truncated, batch_size=batch_size, shuffle=False)


# interpolation
train_dataset_interpolated = ECGDataset(X_train_interpolated, y_train_interpolated)
test_dataset_interpolated = ECGDataset(X_test_interpolated, y_test_interpolated)

train_loader_interpolated = DataLoader(train_dataset_interpolated, batch_size=batch_size, shuffle=True)
test_loader_interpolated = DataLoader(test_dataset_interpolated, batch_size=batch_size, shuffle=False)



In [101]:
import torch.nn as nn
import torch.nn.functional as F

class ECGCNN(nn.Module):
    def __init__(self, num_classes=4, input_length=9000):  # Suppose que vous avez 4 classes : N, AF, O, ~
        super(ECGCNN, self).__init__()
        
        # Convolutions 1D
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=7, stride=1, padding=3)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)

        # pooling
        self.pool = nn.AvgPool1d(kernel_size=2, stride=2)

        # Fully connected layers (MLP)
        self.fc1 = nn.Linear(72000, 128)  # Ajustez selon votre entrée après pooling
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        x = x.view(x.size(0), -1)  # Flatten pour les fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x




In [102]:
import torch.optim as optim

num_classes = 4
criterion = nn.CrossEntropyLoss()  # Fonction de perte pour la classification multi-classes
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Initialiser le modèle pour un taille max
model_max = ECGCNN(num_classes, 18286)
optimizer_max = optim.Adam(model_max.parameters(), lr=0.001)  # Utilisation de l'optimiseur Adam
model_max.to(device)

# Initialiser le modèle pour un taille médiane
model_med_tr = ECGCNN(num_classes, 9000)
optimizer_med_tr = optim.Adam(model_med_tr.parameters(), lr=0.001)  # Utilisation de l'optimiseur Adam
model_med_tr.to(device)

# Initialiser le modèle pour un taille médiane
model_med_in = ECGCNN(num_classes, 9000)
optimizer_med_in = optim.Adam(model_med_in.parameters(), lr=0.001)  # Utilisation de l'optimiseur Adam
model_med_in.to(device)



ECGCNN(
  (conv1): Conv1d(1, 16, kernel_size=(7,), stride=(1,), padding=(3,))
  (conv2): Conv1d(16, 32, kernel_size=(5,), stride=(1,), padding=(2,))
  (conv3): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (conv4): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
  (fc1): Linear(in_features=72000, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=4, bias=True)
)

In [103]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Mode entraînement
    
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for signals, labels in train_loader:
            signals = signals.to(device)
            labels = labels.to(device)
            
            # Remettre les gradients à zéro
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(signals)
            loss = criterion(outputs, labels)
            
            # Backward pass
            loss.backward()
            
            # Mise à jour des poids
            optimizer.step()
            
            running_loss += loss.item()

            # Calcul de l'accuracy (précision)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        # Calcul de la loss moyenne et de l'accuracy pour cette époque
        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Train accuracy: {epoch_accuracy:.2f}%')
    
    print('Finished Training')



In [104]:
def evaluate_model_per_class(model, test_loader, class_names):
    model.eval()  # Mode évaluation
    correct = 0
    total = 0

    # Initialiser des variables pour suivre les performances par classe
    class_correct = np.zeros(len(class_names))
    class_total = np.zeros(len(class_names))
    
    with torch.no_grad():  # Pas de calcul des gradients en mode évaluation
        for signals, labels in test_loader:
            signals = signals.to(device)
            labels = labels.to(device)
            
            # Prédictions
            outputs = model(signals)
            _, predicted = torch.max(outputs, 1)
            
            # Mettre à jour les valeurs globales
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Calculer les vrais positifs pour chaque classe
            for i in range(len(class_names)):
                # Sélectionner les indices correspondant à la classe i
                idxs = (labels == i)
                
                # Nombre de vrais positifs pour la classe i
                class_correct[i] += (predicted[idxs] == labels[idxs]).sum().item()
                
                # Nombre total d'exemples de la classe i
                class_total[i] += idxs.sum().item()

    # Calculer l'accuracy globale
    accuracy = 100 * correct / total
    print(f'Accuracy globale: {accuracy:.2f}%')

    # Calculer et afficher l'accuracy par classe
    for i, class_name in enumerate(class_names):
        if class_total[i] > 0:
            class_accuracy = 100 * class_correct[i] / class_total[i]
            print(f'Accuracy pour la classe {class_name}: {class_accuracy:.2f}%')
        else:
            print(f'Pas d\'exemples pour la classe {class_name} dans l\'ensemble de test.')

# Exemple d'utilisation
class_names = ['N', 'AF', 'O', '~']  # Les classes correspondantes à 0, 1, 2, 3


In [105]:
# Entraîner le modèle avec les données padded
train_model(model_max, train_loader_padded, criterion, optimizer_max, num_epochs=10)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x146240 and 72000x128)

In [24]:
# Évaluer le modèle
evaluate_model_per_class(model_max, test_loader_padded, class_names)

Accuracy globale: 50.00%
Accuracy pour la classe N: 67.19%
Accuracy pour la classe AF: 3.95%
Accuracy pour la classe O: 33.33%
Accuracy pour la classe ~: 7.14%


In [106]:
# Entraîner le modèle avec les données padded + truncated
train_model(model_med_tr, train_loader_truncated, criterion, optimizer_med_tr, num_epochs=20)



Epoch [1/20], Loss: 1.6420, Train accuracy: 57.39%
Epoch [2/20], Loss: 0.8713, Train accuracy: 62.45%
Epoch [3/20], Loss: 0.5959, Train accuracy: 75.32%
Epoch [4/20], Loss: 0.2986, Train accuracy: 88.68%
Epoch [5/20], Loss: 0.1533, Train accuracy: 94.49%
Epoch [6/20], Loss: 0.0822, Train accuracy: 97.60%
Epoch [7/20], Loss: 0.0466, Train accuracy: 98.77%
Epoch [8/20], Loss: 0.0540, Train accuracy: 98.58%
Epoch [9/20], Loss: 0.0659, Train accuracy: 98.07%
Epoch [10/20], Loss: 0.0370, Train accuracy: 98.87%
Epoch [11/20], Loss: 0.0531, Train accuracy: 98.81%
Epoch [12/20], Loss: 0.0361, Train accuracy: 99.06%
Epoch [13/20], Loss: 0.0305, Train accuracy: 99.15%
Epoch [14/20], Loss: 0.0119, Train accuracy: 99.65%
Epoch [15/20], Loss: 0.0170, Train accuracy: 99.69%
Epoch [16/20], Loss: 0.0044, Train accuracy: 99.91%
Epoch [17/20], Loss: 0.0054, Train accuracy: 99.94%
Epoch [18/20], Loss: 0.0013, Train accuracy: 99.96%
Epoch [19/20], Loss: 0.0016, Train accuracy: 99.96%
Epoch [20/20], Loss: 

In [107]:
# Évaluer le modèle
evaluate_model_per_class(model_med_tr, test_loader_truncated, class_names)

Accuracy globale: 49.59%
Accuracy pour la classe N: 69.66%
Accuracy pour la classe AF: 3.95%
Accuracy pour la classe O: 27.12%
Accuracy pour la classe ~: 3.57%


In [108]:
# Entraîner le modèle avec les données interpolated
train_model(model_med_in, train_loader_interpolated, criterion, optimizer_med_in, num_epochs=20)



Epoch [1/20], Loss: 2.9602, Train accuracy: 56.87%
Epoch [2/20], Loss: 0.8674, Train accuracy: 64.12%
Epoch [3/20], Loss: 0.5740, Train accuracy: 76.80%
Epoch [4/20], Loss: 0.3161, Train accuracy: 87.80%
Epoch [5/20], Loss: 0.1685, Train accuracy: 94.42%
Epoch [6/20], Loss: 0.1459, Train accuracy: 95.76%
Epoch [7/20], Loss: 0.0862, Train accuracy: 97.55%
Epoch [8/20], Loss: 0.0633, Train accuracy: 98.26%
Epoch [9/20], Loss: 0.0867, Train accuracy: 97.80%
Epoch [10/20], Loss: 0.0498, Train accuracy: 98.75%
Epoch [11/20], Loss: 0.0500, Train accuracy: 98.84%
Epoch [12/20], Loss: 0.0318, Train accuracy: 99.27%
Epoch [13/20], Loss: 0.0374, Train accuracy: 99.08%
Epoch [14/20], Loss: 0.0639, Train accuracy: 98.30%
Epoch [15/20], Loss: 0.0349, Train accuracy: 99.08%
Epoch [16/20], Loss: 0.0248, Train accuracy: 99.40%
Epoch [17/20], Loss: 0.0271, Train accuracy: 99.49%
Epoch [18/20], Loss: 0.0457, Train accuracy: 98.84%
Epoch [19/20], Loss: 0.0363, Train accuracy: 99.09%
Epoch [20/20], Loss: 

In [109]:
# Évaluer le modèle
evaluate_model_per_class(model_med_in, test_loader_interpolated, class_names)

Accuracy globale: 49.53%
Accuracy pour la classe N: 69.95%
Accuracy pour la classe AF: 9.87%
Accuracy pour la classe O: 21.95%
Accuracy pour la classe ~: 25.00%
