# Import libraries

In [1]:
import numpy as np
import pandas as pd
import mne  # library for reading edf files
import pywt  # library for continuous wavelet transform
import sqlite3
import pickle


# Create functions to read data from file and save to database

In [2]:
def file_to_DataDrame(path):
    """
    This function takes in a file path and returns a dataframe with the data and the target values

    Args:
        path (str): The path to the file
    Returns:
        pd.DataFrame: The dataframe containing the data and the target values
    Examples:
        >>> df = file_to_DataDrame("data/S001/S001R03.edf")
        >>> print(df)
            Fc5	        Fc3	        Fc1	        ...	Oz	        O2	        Iz	        target     
        0	-0.000046	-0.000041	-0.000032	...	0.000040	0.000108	0.000055	0
        1    -0.000054	-0.000048	-0.000034	...	0.000064	0.000114	0.000074	0
        ...
    """

    reader = mne.io.read_raw_edf(path, preload=True)
    annotations = reader.annotations  # get the values of the annotations
    codes = annotations.description  # get the codes from the annotations

    df = pd.DataFrame(
        reader.get_data().T,
        columns=[channel.replace(".", "") for channel in reader.ch_names],
    )  # transpose the data to get the right shape
    df = df[~(df == 0).all(axis=1)]  # remove rows with all zeros
    timeArray = np.array(
        [round(x, 10) for x in np.arange(0, len(df) / 160, 0.00625)]
    )  # create an array of time values

    codeArray = []
    counter = 0
    for timeVal in timeArray:
        if (
                timeVal in annotations.onset
        ):
            counter += 1
        code_of_target = int(
            codes[counter - 1].replace("T", "")
        )
        codeArray.append(code_of_target)

    df["target"] = np.array(codeArray).T
    return df


In [3]:
def read_all_file_df(num_exp=[3, 4], num_people=[1, 2], path="../../data/raw/"):
    """
    This function reads all the files in the path and returns a dataframe with the data and the target values
    format:
        Fc5	        Fc3	        Fc1	        ...	Oz	        O2	        Iz	        target
    0	-0.000046	-0.000041	-0.000032	...	0.000040	0.000108	0.000055	0
    1    -0.000054	-0.000048	-0.000034	...	0.000064	0.000114	0.000074	0
    ...
    Args:
        num_exp (list): The list of experiments to read
        num_people (list): The list of people to read
        path (str): The path to the files
    Returns:
        pd.DataFrame: The dataframe containing the data and the target values
    """
    all_df = pd.DataFrame()
    for subject in num_people:
        for file in num_exp:
            fileName = f"{path}/S{subject:03d}/S{subject:03d}R{file:02d}.edf"
            df = file_to_DataDrame(fileName)
            all_df = pd.concat([all_df, df], axis=0)
    return all_df

In [None]:
def create_database(db_path):
    """ 
    This function creates a database with a table to store the continuous wavelet transform of the signals
    
    Args:
        db_path (str): The path to the database
    Returns:
        None
    examples:
        >>> create_database("cwt_data.db")
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS wavelet_transforms (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            cwt_data BLOB,
            target INTEGER
        )
    """
    )
    conn.commit()
    conn.close()


def insert_cwt_data(db_path, cwt_data, targets):
    """
    This function takes in the continuous wavelet transform of the signals and the target values and saves them to a database
    Args:
        db_path (str): The path to the database
        cwt_data (np.array): The continuous wavelet transform of the signals
        targets (np.array): The target values
    Returns:
        None
    Examples:
        >>> insert_cwt_data("cwt_data.db", cwt_data, targets)
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cwt_data = cwt_data.transpose(2, 0, 1)

    cwt_data = cwt_data.reshape(cwt_data.shape[0], -1)  # <-------- this is option 
    print(f"success save: {cwt_data.shape}")
    i = 0
    for single_cwt in cwt_data:
        cwt_blob = pickle.dumps(np.array(single_cwt, dtype=np.float32))
        cursor.execute(
            "INSERT INTO wavelet_transforms (cwt_data, target) VALUES (?, ?)",
            (cwt_blob, targets[i]),
        )
        i += 1
    conn.commit()
    conn.close()

In [None]:
def df_to_CWTfiles(
        df, num_of_rows=1000, wave="cgau4", frq=160, resolution=100, db_path="cwt_data.db"
):
    """
    This function takes in a dataframe and saves the continuous wavelet transform of the signals to a database

    Args:
        df (pd.DataFrame): The dataframe containing the signals
        num_of_rows (int): The number of rows to process 
        wave (str): The type of wave to use
        frq (int): The frequency of the signals
        resolution (int): The resolution of the wavelet transform
        db_path (str): The path to the database
    Returns:
        None
    """
    create_database(db_path)

    for i in range(0, len(df), num_of_rows):
        if i + num_of_rows > len(df):
            break
        signals = df.iloc[i: i + num_of_rows].values
        list_cwt = []
        targets = ()
        if signals.shape == (num_of_rows, 65):
            signals = signals.transpose(1, 0)
        j = 0
        for signal in signals:
            j += 1
            if j == len(signals):
                targets = signal
                break
            signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
            time = np.linspace(0, len(signal) / frq, len(signal))
            widths = np.geomspace(1, 200, num=resolution)
            sampling_period = np.diff(time).mean()
            cwtmatr, freqs = pywt.cwt(
                signal, widths, wave, sampling_period=sampling_period
            )
            cwtmatr = np.abs(cwtmatr)
            list_cwt.append(cwtmatr)

        array_cwt = np.stack(list_cwt, axis=0)
        insert_cwt_data(db_path, array_cwt, targets)
        del array_cwt

# Reading data from files

### What is done in experiment

1. Baseline, eyes open
2. Baseline, eyes closed
3. Task 1 (open and close left or right fist)
4. Task 2 (imagine opening and closing left or right fist)
5. Task 3 (open and close both fists or both feet)
6. Task 4 (imagine opening and closing both fists or both feet)
7. Task 1
8. Task 2
9. Task 3
10. Task 4
11. Task 1
12. Task 2
13. Task 3
14. Task 4

In [5]:
df_train = read_all_file_df([3, 7], [1, 2, 3])
df_val = read_all_file_df([3, 7], [5, 6])

Extracting EDF parameters from /home/daniel/repos/Decoding_of_EEG/data/raw/S001/S001R03.edf...
EDF file detected


FileNotFoundError: [Errno 2] No such file or directory: '/home/daniel/repos/Decoding_of_EEG/data/raw/S001/S001R03.edf'

### Save data after transform to database

In [None]:
df_to_CWTfiles(
    df_val, num_of_rows=1000, wave="cgau4", frq=160, resolution=10, db_path="df_val_cwt_data.db"
)
df_to_CWTfiles(
    df_train, num_of_rows=1000, wave="cgau4", frq=160, resolution=10, db_path="df_train_cwt_data.db"
)

In [None]:
df

In [None]:
def df_to_CWT(
        df, num_of_rows=1000, wave="cgau4", frq=160, resolution=100, db_path="cwt_data.db"
):
    # Utworzenie bazy danych, jeśli nie istnieje
    create_database(db_path)

    for i in range(0, len(df), num_of_rows):
        if i + num_of_rows > len(df):
            break
        signals = df.iloc[i: i + num_of_rows].values
        list_cwt = []
        targets = ()
        if signals.shape == (num_of_rows, 65):
            signals = signals.transpose(1, 0)
        j = 0
        # print(len(signals))
        for signal in signals:
            j += 1
            if j == len(signals):
                targets = signal
                break
            signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
            time = np.linspace(0, len(signal) / frq, len(signal))
            widths = np.geomspace(1, 200, num=resolution)
            sampling_period = np.diff(time).mean()
            cwtmatr, freqs = pywt.cwt(
                signal, widths, wave, sampling_period=sampling_period
            )
            cwtmatr = np.abs(cwtmatr)
            list_cwt.append(cwtmatr)

        array_cwt = np.stack(list_cwt, axis=0)
        return array_cwt, targets  # Zapis do bazy danych


In [None]:
cwt, targ = df_to_CWT(
    df, num_of_rows=1000, wave="cgau4", frq=160, resolution=10, db_path="cwt_data.db"
)


In [None]:
frq = 160
resolution = 10
num_of_rows = 1000
wave = "cgau4"
signals = df.iloc[0: 0 + num_of_rows].values
list_cwt = []
targets = ()
if signals.shape == (num_of_rows, 65):
    signals = signals.transpose(1, 0)
j = 0
# print(len(signals))
for signal in signals:
    j += 1
    if j == len(signals):
        targets = signal
        break
    signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
    time = np.linspace(0, len(signal) / frq, len(signal))
    widths = np.geomspace(1, 200, num=resolution)
    sampling_period = np.diff(time).mean()
    cwtmatr, freqs = pywt.cwt(
        signal, widths, wave, sampling_period=sampling_period
    )
    cwtmatr = np.abs(cwtmatr)
    list_cwt.append(cwtmatr)
array_cwt = np.stack(list_cwt, axis=0)