<a href="https://colab.research.google.com/github/Mahedi-Hasan-Anik/Thesis_materials/blob/main/Data_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pyedflib
import os
import numpy as np
import pandas as pd
import pyedflib

def process_edf_files(edf_files, label):

    combined_data = []

    for edf_file in edf_files:
        try:
            edf = pyedflib.EdfReader(edf_file)
            n_signals = edf.signals_in_file

            if n_signals != 19:
                print(f"Skipping {edf_file}: Expected 19 channels, found {n_signals}")
                continue

            data = []
            for i in range(n_signals):
                signal = edf.readSignal(i)
                data.append(signal)

            data = np.array(data).T

            data_with_label = np.c_[data, np.full((data.shape[0], 1), label)]


            combined_data.append(data_with_label)

            edf._close()
        except Exception as e:
            print(f"Error processing {edf_file}: {e}")


    if combined_data:
        combined_data = np.vstack(combined_data)
        columns = [f"Channel_{i+1}" for i in range(19)] + ["Label"]
        return pd.DataFrame(combined_data, columns=columns)
    else:
        return pd.DataFrame()

def combine_edf_datasets(healthy_dir, sick_dir, output_csv_path):

    healthy_files = [os.path.join(healthy_dir, f) for f in os.listdir(healthy_dir) if f.endswith(".edf")]
    sick_files = [os.path.join(sick_dir, f) for f in os.listdir(sick_dir) if f.endswith(".edf")]


    print("Processing healthy files...")
    healthy_data = process_edf_files(healthy_files, label=0)

    print("Processing sick files...")
    sick_data = process_edf_files(sick_files, label=1)


    combined_data = pd.concat([healthy_data, sick_data], ignore_index=True)


    combined_data.to_csv(output_csv_path, index=False)
    print(f"Combined dataset saved to {output_csv_path}")


healthy_dir = "/content/drive/MyDrive/Colab_Notebooks/dataverse/healthy"
sick_dir = "/content/drive/MyDrive/Colab_Notebooks/dataverse/sick"
output_csv_path = "/content/drive/MyDrive/Colab_Notebooks/dataverse/dataset.csv"

combine_edf_datasets(healthy_dir, sick_dir, output_csv_path)


Collecting pyedflib
  Downloading pyEDFlib-0.1.38-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)
Downloading pyEDFlib-0.1.38-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/2.7 MB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.7/2.7 MB[0m [31m39.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyedflib
Successfully installed pyedflib-0.1.38
Processing healthy files...
Processing sick files...
Combined dataset saved to /content/drive/MyDrive/Colab_Notebooks/dataverse/dataset.csv
