<a href="https://www.kaggle.com/code/basth94/ml-bio-exercice-12?scriptVersionId=144200810" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Exercice 12:

[lien Nowledgeable](https://nowledgeable.com/invitation/student/join-module/9c8ec467-686a-44cd-a2f2-85cf174a79ad)
1. Tester un modèle de convolution 1D sans faire des extractions de features.

2. Tester un classifier avec et sans faire la transformée en ondelettes pour savoir si le modèle donne une meilleur résultat avec la transformée en ondelettes ?

3. Tester des modèles de classification différentes  comme RadomForest, ExtraTrees, GradientBoosting, etc.

4. Tester des différentes familles d'ondelettes comme Symlets, Coiflets, Biorthogonal etc.

5. Tester d'autres features (vous pouvez vous référer des listes de features avec des librairies tsfresh ou tsfel )

6. A l'aide de la transformée en ondelettes continue, transformez des signaux ECG en scaleograms et puis utilisez un modèle CNN pour classifier des images.


In [None]:
!pip install wfdb

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
pd.set_option('display.max_columns', None)

import wfdb
import glob
import os
import pywt
import tensorflow as tf
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  Dense, Dropout, Conv2D, Input, MaxPooling2D,Flatten

In [None]:
def load_signal_using_wfdb(file, start, end, channel, pn_dir):
    """
    file: name of record
    start: int
    end:int
    channel: 0 or 1
    """
    record = wfdb.rdrecord(file, sampfrom = start, sampto = end, channels=[channel], pn_dir=pn_dir)
    data = record.p_signal.reshape(-1)
    return data


In [None]:
database_names = ['nsrdb','mitdb','chfdb']
n_label = len(database_names)

In [None]:
record_names = []
labels = []

for i,name in enumerate(database_names):
    current_record_names = wfdb.get_record_list(name)
    record_names += current_record_names
    labels += [i for record_name in current_record_names]
assert len(record_names) == len(labels)

In [None]:
def generate_data(sample_size,scale_size,nb_samples_per_signal , labels, record_names,database_names, file_path = None ):
    
    scales = range(1, scale_size+1)
    waveletname = 'morl'
    signal_ds = []
    
    y_train = np.zeros(nb_samples_per_signal*len(labels))
    signal_ds = []
    
    for i in tqdm(range(len(record_names))):
        record_name,label = record_names[i],labels[i]
        pn_dir = database_names[label]
        signal=(load_signal_using_wfdb(record_name, start=0, end=nb_samples_per_signal*sample_size, channel = 0, pn_dir=pn_dir))
        signal_ds += np.split(signal,nb_samples_per_signal)
        y_train[i*nb_samples_per_signal:(i+1)*nb_samples_per_signal] = label

    signal_ds = np.array(signal_ds)
    # pd.DataFrame([signal_ds,y_train]).to_csv(file_path,index=True)
    
    X_train = []
    for signal in signal_ds:
        coeff, freqs = pywt.cwt(signal, scales, waveletname, 1)
        X_train.append(coeff)
    X_train = np.array(X_train)
    X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],X_train.shape[2],1))
    X_tensor = tf.convert_to_tensor(X_train)

    num_classes = len(np.unique(y_train))
    y = tf.keras.utils.to_categorical(y_train,num_classes)
    
    y_tensor = tf.convert_to_tensor(y)
    print(y_tensor[0])
    return X_tensor,y_tensor


In [None]:

sample_size = 128
scale_size = 128
nb_samples_per_signal = 32

X_tensor, y_tensor = generate_data(sample_size = sample_size,scale_size = scale_size,nb_samples_per_signal = nb_samples_per_signal, labels= labels, record_names = record_names,database_names=database_names,file_path='signals_csv' )
assert X_tensor.shape[0] == y_tensor.shape[0]

In [None]:
def build_model(input_shape,num_class):
    model = Sequential()
    model.add(Input(shape=input_shape))
    
    model.add(Conv2D(filters=64,kernel_size=5, activation='relu'))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    
    model.add(Conv2D(filters=32,kernel_size=3, activation='relu'))
    model.add(MaxPooling2D())

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_class,activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model
model = build_model(X_tensor.shape[1:],y_tensor.shape[1])
model.summary()

In [None]:
model.fit(X_tensor,y_tensor,validation_split=0.2,epochs=10)

In [None]:
signal_test = load_signal_using_wfdb('208', start=0, end=2000, channel = 0, pn_dir='mitdb')
waveletname = 'morl'
scales = range(1, 128)
coeff, freqs = pywt.cwt(signal_test, scales, waveletname, 1)
print(coeff.shape)
N = signal_test.size
sr = 360
dt = 1/sr
time = np.arange(0, N)*dt
fig, (ax1, ax2) = plt.subplots(2, figsize=(10, 5))
ax1.plot(time, signal_test)
im = ax2.imshow(abs(coeff[:,:]), extent = [0, 200, 30, 1],
                interpolation = 'bilinear', aspect = 'auto')