In [1]:

from pathlib import Path
import nbformat
import pandas as pd
import numpy as np
import io
import sys
import matplotlib.pyplot as plt
from scipy.fft import fft
from scipy.stats import entropy
from unittest.mock import patch

# Fonctions de traitement
def add_gaussian_noise(signal, noise_level=0.02):
    return signal + np.random.normal(0, noise_level, size=signal.shape)

def time_shift(signal, shift_ratio=0.05):
    shift = int(len(signal) * shift_ratio)
    return np.roll(signal, shift)

def scale_amplitude(signal, scale_factor=1.1):
    return signal * scale_factor

def time_warp(signal, warp_factor=1.1):
    from scipy.interpolate import interp1d
    x = np.arange(len(signal))
    f = interp1d(x, signal, kind='linear', fill_value='extrapolate')
    x_new = np.linspace(0, len(signal) - 1, int(len(signal) * warp_factor))
    warped = f(x_new)
    return warped[:len(signal)] if len(warped) > len(signal) else np.pad(warped, (0, len(signal) - len(warped)), mode='constant')

def spectral_entropy(signal, fs=1000):
    spectrum = np.abs(fft(signal))[:len(signal)//2]
    psd = spectrum**2
    psd_norm = psd / np.sum(psd)
    return entropy(psd_norm)

def mean_frequency(signal, fs=1000):
    spectrum = np.abs(fft(signal))[:len(signal)//2]
    freqs = np.linspace(0, fs/2, len(spectrum))
    return np.sum(freqs * spectrum) / np.sum(spectrum)

# Chemins mis à jour
normal_dir = Path('/Users/Julie/Desktop/projet_kyushu/Normal_analyse')
patient_dir = Path('/Users/Julie/Desktop/projet_kyushu/Patient_analyse')
log_normal = Path('/Users/Julie/Desktop/projet_kyushu/SEMG_DB1/N_TXT')
log_patient = Path('/Users/Julie/Desktop/projet_kyushu/SEMG_DB1/A_TXT')
data = []

def process_files(ipynb_paths, txt_paths, label):
    for path in ipynb_paths:
        base_name = path.stem.replace('.ipynb', '')
        txt_file = txt_paths / (base_name + '.txt')
        print(f"Traitement de : {path.name}")
        if not txt_file.exists():
            print(f"Fichier txt introuvable : {txt_file}")
            continue
        try:
            with open(path, 'r', encoding='utf-8') as f:
                nb = nbformat.read(f, as_version=4)
            print("Notebook chargé")
            global_vars = {}
            sys.stdout = io.StringIO()
            sys.stderr = io.StringIO()
            plt.ioff()
            with patch('matplotlib.pyplot.show'), patch('IPython.display.display'):
                for cell in nb.cells:
                    if cell.cell_type == 'code':
                        try:
                            exec(cell.source, global_vars)
                        except Exception:
                            pass
            sys.stdout = sys.__stdout__
            sys.stderr = sys.__stderr__
            plt.ion()
            plt.close("all")

            global_vars['file_path1'] = str(txt_file)
            df = global_vars['load_and_process_data'](global_vars['file_path1'])

            emg_RF = df['RF'].values
            emg_BF = df['BF'].values
            emg_VM = df['VM'].values
            emg_ST = df['ST'].values

            augmentations = {
                'gaussian_noise': add_gaussian_noise(emg_ST),
                'time_shift': time_shift(emg_ST),
                'scale_amplitude': scale_amplitude(emg_ST),
                'time_warp': time_warp(emg_ST)
            }

            for aug, sig in augmentations.items():
                data.append({
                    'file': path.name,
                    'augmentation': aug,
                    'SE_RF': spectral_entropy(emg_RF),
                    'SE_BF': spectral_entropy(emg_BF),
                    'SE_VM': spectral_entropy(emg_VM),
                    'SE_ST': spectral_entropy(sig),
                    'MF_ST': mean_frequency(sig),
                    'label': label
                })

        except Exception as e:
            print(f"Erreur dans {path.name} : {e}")

# Lancement
normal_files = sorted(normal_dir.rglob("*Nmar.ipynb"))
patient_files = sorted(patient_dir.rglob("*Amar.ipynb"))

print(f"Fichiers normaux trouvés : {len(normal_files)}")
print(f"Fichiers patients trouvés : {len(patient_files)}")

process_files(normal_files, log_normal, label='N')
process_files(patient_files, log_patient, label='A')

df = pd.DataFrame(data)
df.to_csv("emg_dataset_final_clean.csv", index=False)
print("Export terminé : emg_dataset_final_clean.csv")
df.head()


Fichiers normaux trouvés : 11
Fichiers patients trouvés : 11
Traitement de : 10Nmar.ipynb
Notebook chargé
Erreur dans 10Nmar.ipynb : name 'pd' is not defined
Traitement de : 11Nmar.ipynb
Notebook chargé
Erreur dans 11Nmar.ipynb : name 'pd' is not defined
Traitement de : 1Nmar.ipynb
Notebook chargé
Erreur dans 1Nmar.ipynb : name 'pd' is not defined
Traitement de : 2Nmar.ipynb
Notebook chargé
Erreur dans 2Nmar.ipynb : name 'pd' is not defined
Traitement de : 3Nmar.ipynb
Notebook chargé
Erreur dans 3Nmar.ipynb : name 'pd' is not defined
Traitement de : 4Nmar.ipynb
Notebook chargé
Erreur dans 4Nmar.ipynb : name 'pd' is not defined
Traitement de : 5Nmar.ipynb
Notebook chargé
Erreur dans 5Nmar.ipynb : name 'pd' is not defined
Traitement de : 6Nmar.ipynb
Notebook chargé
Erreur dans 6Nmar.ipynb : name 'pd' is not defined
Traitement de : 7Nmar.ipynb
Notebook chargé
Erreur dans 7Nmar.ipynb : name 'pd' is not defined
Traitement de : 8Nmar.ipynb
Notebook chargé
Erreur dans 8Nmar.ipynb : name 'pd' 