In [127]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer, Dropout
from tensorflow import convert_to_tensor, float32
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import seaborn as sns
import mlflow
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from imblearn.under_sampling import RandomUnderSampler
from scipy import signal
import pywt


In [128]:
path = "Dataset/mitbih_database"
filenames = next(os.walk(path))[2]
records=list()
annotations=list()
filenames.sort()
for f in filenames:
    filename, file_extension = os.path.splitext(f)
    if(file_extension=='.csv'):
        records.append(path+'/'+filename+file_extension)
    else:
        annotations.append(path+'/'+filename+file_extension)

In [129]:
def get_record_signals(index):
    signals = []
    labels = []
    with open(records[index],'r') as csvfile:
        filereader = csv.reader(csvfile,delimiter=',',quotechar='|')
        row_index = -1
        for row in filereader:
            if(row_index >= 0):
                signals.insert(row_index, int(row[1]))
            row_index += 1
        signals = np.array(signals)
    with open(annotations[index],'r') as csvfile:
        filereader = csv.reader(csvfile,delimiter=',',quotechar='|')
        row_index = -1
        for row in filereader:
            if(row_index >= 0):
                elements = list(filter(lambda x: len(x) > 0, row[0].split(" ")))
                labels.insert(row_index, [int(elements[1]), elements[2]])
            row_index += 1
        labels = np.array(labels)
    return signals, labels

In [130]:
def apply_detrend_and_butterworth(signals):
    fs = 360.0
    N = 650000
    T = N / fs
    t = np.linspace(0, T, N, endpoint=False)

    # Detrend (usuniecie DC/linearna skÅ‚adowa)
    data_detrended = signal.detrend(signals)

    # Butterworth bandpass (np. 0.5 - 40 Hz)
    lowcut = 0.5
    highcut = 40.0
    nyq = 0.5 * fs
    b, a = signal.butter(4, [lowcut/nyq, highcut/nyq], btype='band')
    data_filt = signal.filtfilt(b, a, data_detrended)  # zero-phase
    return data_filt

In [131]:
def extract_wavelet_features(data):
    coeffs = pywt.wavedec(data, 'sym4', level=4)

    features = []
    for c in coeffs[1:]:
        features.append(np.sum(c**2))
    return features

In [132]:
def apply_wavelet(data): 
    w = pywt.Wavelet('sym4')
    maxlev = pywt.dwt_max_level(len(data), w.dec_len)
    threshold = 0.04 # Threshold for filtering

    coeffs = pywt.wavedec(data, 'sym4', level=maxlev)
    for i in range(1, len(coeffs)):
        coeffs[i] = pywt.threshold(coeffs[i], threshold*max(coeffs[i]))
        
    datarec = pywt.waverec(coeffs, 'sym4')
    return datarec

In [133]:
def apply_welch(data):
    fs = 360.0
    nperseg = 64
    noverlap = nperseg // 2
    f_welch, Pxx = signal.welch(data, fs=fs, nperseg=nperseg, noverlap=noverlap, window='hann')
    return f_welch, Pxx

In [134]:
def apply_fft(data):
    fs = 360.0
    N = len(data)
    X = np.fft.fft(data)
    freqs = np.fft.fftfreq(N, 1/fs)
    positive = freqs >= 0
    freqs_pos = freqs[positive]
    X_pos = np.abs(X[positive]) / N
    return X_pos, freqs_pos

In [135]:
signals, labels = get_record_signals(6)
print("Number of samples:", len(signals))
print("Value Range:", np.min(signals), "->", np.max(signals))
print("Classes:", np.unique(labels[:, 1]))

Number of samples: 650000
Value Range: 611 -> 1538
Classes: ['+' 'N' 'V' '~']


In [136]:
labels[0]

array(['83', '~'], dtype='<U11')

In [137]:
def process_data(window_size):
    X = []
    y = []

    half_w = window_size // 2
    valid_labels = {'A', 'L', 'N', 'R', 'V'}

    for i in range(len(records)):
        signals, labels = get_record_signals(i)
        signals = apply_detrend_and_butterworth(signals)
        sig_len = len(signals)

        mask = np.isin(labels[:, 1], list(valid_labels))
        filtered_labels = labels[mask]
        
        for j in range(3, len(filtered_labels) - 1):
            # prev_2_label_pos = int(filtered_labels[j - 2][0])
            # prev_1_label_pos = int(filtered_labels[j - 1][0])
            # label_pos = int(filtered_labels[j][0])
            # next_1_label_pos = int(filtered_labels[j + 1][0])
            prev_3_label_pos = int(filtered_labels[j - 3][0])
            prev_2_label_pos = int(filtered_labels[j - 2][0])
            prev_1_label_pos = int(filtered_labels[j - 1][0])
            label_pos = int(filtered_labels[j][0])
            next_1_label_pos = int(filtered_labels[j + 1][0])
            
            RR_j_2 = prev_2_label_pos - prev_3_label_pos
            start_prev_2 = int(int(filtered_labels[j-2][0]) - 0.6 * RR_j_2)
            end_prev_2  = int(int(filtered_labels[j-2][0]) + 0.8 * RR_j_2)

            RR_j_1 = prev_1_label_pos - prev_2_label_pos
            start_prev_1 = int(int(filtered_labels[j-1][0]) - 0.6 * RR_j_1)
            end_prev_1  = int(int(filtered_labels[j-1][0]) + 0.8 * RR_j_1)
            
            RR_j = label_pos - prev_1_label_pos
            start_curr = int(int(filtered_labels[j][0]) - 0.6 * RR_j)
            end_curr  = int(int(filtered_labels[j][0]) + 0.8 * RR_j)
            
            RR_j_p1 = next_1_label_pos - label_pos
            start_next_1 = int(int(filtered_labels[j+1][0]) - 0.6 * RR_j_p1)
            end_next_1  = int(int(filtered_labels[j+1][0]) + 0.8 * RR_j_p1)


            # signal_min_2 = signals[prev_2_label_pos - half_w:prev_2_label_pos + half_w]
            # signal_min_1 = signals[prev_1_label_pos - half_w:prev_1_label_pos + half_w]
            # signal_current = signals[label_pos - half_w:label_pos + half_w]
            # signal_next_1 = signals[next_1_label_pos - half_w:next_1_label_pos + half_w]


            if start_prev_2 < 0 or end_next_1 > sig_len:
                continue

            # wavelet_energy = apply_wavelet(signals[start:end])
            # wavelet_prev_1 = apply_wavelet(signal_min_1)
            # wavelet_curr = apply_wavelet(signal_current)
            # wavelet_next_1 = apply_wavelet(signal_next_1)
            # fusion = [*wavelet_prev_1, *wavelet_curr, *wavelet_next_1]
            X.append([
                *apply_wavelet(signal.resample(signals[start_prev_1:end_prev_1], 256)),
                *apply_wavelet(signal.resample(signals[start_curr:end_curr], 256)),
                *apply_wavelet(signal.resample(signals[start_next_1:end_next_1], 256))])
            y.append(filtered_labels[j][1])

    return X, y


In [138]:
#Hyperparameters
WINDOW_SIZE = 250
EPOCHS = 500
BATCH_SIZE = 128
TEST_SIZE = 0.25
N_RUNS = 5

In [139]:
# signals_list = [X[y.index("A")], X[y.index("L")], X[y.index("N")], X[y.index("R")], X[y.index("V")]]
# titles = ["A - Atrial premature beat (APB)", "L - Left bundle branch block beat (LBBB)", "N - Normal beat", "R - Right bundle branch block beat (RBBB)", "V - Premature ventricular contraction (PVC)"]

# fig, ax = plt.subplots(1, 5, figsize=(20, 5))

# for a, sig, title in zip(ax, signals_list, titles):
#     a.plot(range(0, len(sig)), sig)
#     a.set_title(title)
#     a.set_xlabel("Sample")
#     a.grid(True)

# ax[0].set_ylabel("Amplitude")
# plt.tight_layout()
# plt.show()

In [140]:
mlflow.end_run() # Ensure any previous runs are closed

In [141]:
for i in range(N_RUNS):
    X, y = process_data(window_size=WINDOW_SIZE)
    
    # values, counts = np.unique(y, return_counts=True)
    # plt.bar(values, counts)
    # plt.title('Class Distribution')
    # plt.xlabel('Class')
    # plt.ylabel('Number of Samples')
    # plt.show()
    
    mlflow.start_run()
    mlflow.set_experiment("DNN_MIT_BIH_Arrythmia_Classification_With_Detrend_and_Butterworth_and_Relative_Window_Wavelet_And_Neighboring_Windows")
    mlflow.log_param("model", "DNN-Dense-128-Dense-128-Softmax")
    mlflow.log_param("input_dim", 768)
    mlflow.log_param("epochs", EPOCHS)
    mlflow.log_param("batch_size", BATCH_SIZE)
    mlflow.log_param("optimizer", "adam")
    mlflow.log_param("loss", "categorical_crossentropy")
    mlflow.log_param("test_size", TEST_SIZE)
    mlflow.log_param("scaler", "MinMaxScaler")
    mlflow.log_param("classes", "A,L,N,R,V")
    mlflow.log_param("window_size", WINDOW_SIZE)
    # Train/test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
    
    #Label Binarization
    lb = preprocessing.LabelBinarizer()
    y_train = lb.fit_transform(y_train)
    y_test = lb.transform(y_test)
    
    # Under-sampling
    rus = RandomUnderSampler(sampling_strategy='auto')
    X_train_res, y_train_res = rus.fit_resample(X_train, y_train)
    
    #Min/Max Scaling
    scaler = MinMaxScaler()
    X_train_res = scaler.fit_transform(X_train_res)
    X_test = scaler.transform(X_test)
    
    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    # print(f"Using device: {device}")
    # print(type(X_test))
    # print(X_test.dtype)
    # print(X_test.shape)
    # print(type(X_test[0]))
    model = Sequential()
    model.add(InputLayer(input_shape=(768,), name="InputLayer"))
    model.add(Dense(units=128, activation="relu", name=f"HiddenLayer-1"))
    model.add(Dense(units=128, activation="relu", name=f"HiddenLayer-2"))
    model.add(Dropout(0.1))
    model.add(Dense(5, activation='softmax'))

    model.compile(optimizer='adam',
                    loss='categorical_crossentropy',
                    metrics=['accuracy'],
                    )
    history = model.fit(X_train_res, y_train_res,
            epochs=EPOCHS,
            batch_size=BATCH_SIZE, verbose=0)
        
    X_test_tf = convert_to_tensor(X_test, dtype=float32)
    y_test_tf = convert_to_tensor(y_test)

    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_true = np.argmax(y_test, axis=1)

    accuracy = accuracy_score(y_true, y_pred)

    precision, recall, f1, support = precision_recall_fscore_support(
        y_true, y_pred, average=None
    )

    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        y_true, y_pred, average="macro"
    )

    precision_weighted, recall_weighted, f1_weighted, _ = precision_recall_fscore_support(
        y_true, y_pred, average="weighted"
    )

    mlflow.log_metric("accuracy", accuracy)

    mlflow.log_metric("precision_macro", precision_macro)
    mlflow.log_metric("recall_macro", recall_macro)
    mlflow.log_metric("f1_macro", f1_macro)

    mlflow.log_metric("precision_weighted", precision_weighted)
    mlflow.log_metric("recall_weighted", recall_weighted)
    mlflow.log_metric("f1_weighted", f1_weighted)

    cm = confusion_matrix(y_true, y_pred)
    cm_norm = cm / cm.sum(axis=1, keepdims=True)

    plt.figure(figsize=(7, 6))
    sns.heatmap(
        cm_norm,
        annot=True,
        fmt=".2f",
        cmap="Blues"
    )
    plt.title("Normalized Confusion Matrix")
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

    plt.tight_layout()
    plt.savefig("confusion_matrix.png")
    plt.close()

    mlflow.log_artifact("confusion_matrix.png")
    report = classification_report(y_true, y_pred)
    with open("classification_report.txt", "w") as f:
        f.write(report)
    mlflow.log_artifact("classification_report.txt")
    mlflow.end_run()    

2025/12/28 15:25:11 INFO mlflow.tracking.fluent: Experiment with name 'DNN_MIT_BIH_Arrythmia_Classification_With_Detrend_and_Butterworth_and_Relative_Window_Wavelet_And_Neighboring_Windows' does not exist. Creating a new experiment.




In [142]:
experiment_name = "DNN_MIT_BIH_Arrythmia_Classification_With_Detrend_and_Butterworth_and_Relative_Window_Wavelet_And_Neighboring_Windows"

runs = mlflow.search_runs(
    experiment_names=[experiment_name],
    output_format="pandas"
)

summary = (
    runs
    .groupby(["params.window_size", "params.epochs", "params.model"])
    .agg(
        accuracy_mean=("metrics.accuracy", "mean"),
        accuracy_std=("metrics.accuracy", "std"),
        f1_macro_mean=("metrics.f1_macro", "mean"),
        f1_macro_std=("metrics.f1_macro", "std"),
    )
    .reset_index()
)

print(summary)

  params.window_size params.epochs                     params.model  \
0                250           500  DNN-Dense-128-Dense-128-Softmax   

   accuracy_mean  accuracy_std  f1_macro_mean  f1_macro_std  
0       0.960594      0.005351       0.908943      0.008652  
