# Import libraries

In [1]:
import numpy as np
import pandas as pd
import mne  # library for reading edf files
import pywt  # library for continuous wavelet transform
import sqlite3
import pickle
from tqdm import tqdm


# Create functions to read data from file and save to database

In [2]:
def file_to_DataDrame(path):
    """
    This function takes in a file path and returns a dataframe with the data and the target values

    Args:
        path (str): The path to the file
    Returns:
        pd.DataFrame: The dataframe containing the data and the target values
    Examples:
        >>> df = file_to_DataDrame("data/S001/S001R03.edf")
        >>> print(df)
            Fc5	        Fc3	        Fc1	        ...	Oz	        O2	        Iz	        target     
        0	-0.000046	-0.000041	-0.000032	...	0.000040	0.000108	0.000055	0
        1    -0.000054	-0.000048	-0.000034	...	0.000064	0.000114	0.000074	0
        ...
    """

    reader = mne.io.read_raw_edf(path, preload=True)
    annotations = reader.annotations  # get the values of the annotations
    codes = annotations.description  # get the codes from the annotations

    df = pd.DataFrame(
        reader.get_data().T,
        columns=[channel.replace(".", "") for channel in reader.ch_names],
    )  # transpose the data to get the right shape
    df = df[~(df == 0).all(axis=1)]  # remove rows with all zeros
    timeArray = np.array(
        [round(x, 10) for x in np.arange(0, len(df) / 160, 0.00625)]
    )  # create an array of time values treanig 

    codeArray = []
    counter = 0
    for timeVal in timeArray:
        if (
                timeVal in annotations.onset
        ):
            counter += 1
        code_of_target = int(
            codes[counter - 1].replace("T", "")
        )
        codeArray.append(code_of_target)

    df["target"] = np.array(codeArray).T
    return df


In [3]:
def read_all_file_df(num_exp=[3, 4], num_people=[1, 2], path="../../data/raw/"):
    """
    This function reads all the files in the path and returns a dataframe with the data and the target values
    format:
        Fc5	        Fc3	        Fc1	        ...	Oz	        O2	        Iz	        target
    0	-0.000046	-0.000041	-0.000032	...	0.000040	0.000108	0.000055	0
    1    -0.000054	-0.000048	-0.000034	...	0.000064	0.000114	0.000074	0
    ...
    Args:
        num_exp (list): The list of experiments to read
        num_people (list): The list of people to read
        path (str): The path to the files
    Returns:
        pd.DataFrame: The dataframe containing the data and the target values
    """
    all_df = pd.DataFrame()
    for subject in num_people:
        for file in num_exp:
            fileName = f"{path}/S{subject:03d}/S{subject:03d}R{file:02d}.edf"
            df = file_to_DataDrame(fileName)
            all_df = pd.concat([all_df, df], axis=0)
    return all_df

In [None]:
import psycopg2
import pickle
import numpy as np

def create_database(dbname, user, password, host):
    conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host)
    cursor = conn.cursor()
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS wavelet_transforms (
            id SERIAL PRIMARY KEY,
            cwt_data BYTEA,
            target INTEGER
        )
        """
    )
    conn.commit()
    conn.close()

def insert_cwt_data(dbname, user, password, host, cwt_data, targets):
    conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host)
    cursor = conn.cursor()
    cwt_data = cwt_data.transpose(2, 0, 1)
    cwt_data = cwt_data.reshape(cwt_data.shape[0], -1)

    for i, single_cwt in enumerate(cwt_data):
        cwt_blob = pickle.dumps(np.array(single_cwt, dtype=np.float32))
        cursor.execute(
            "INSERT INTO wavelet_transforms (cwt_data, target) VALUES (%s, %s)",
            (psycopg2.Binary(cwt_blob), targets[i]),
        )
    
    conn.commit()
    conn.close()

In [None]:
import psycopg2
import pickle
import numpy as np

def create_database(dbname, user, password, host):
    conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host)
    cursor = conn.cursor()
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS wavelet_transforms (
            id SERIAL PRIMARY KEY,
            cwt_data BYTEA,
            target INTEGER
        )
        """
    )
    conn.commit()
    conn.close()

def insert_cwt_data(dbname=, user, password, host, cwt_data, targets):
    conn = psycopg2.connect(dbname=dbname, user=user, password=password, host=host)
    cursor = conn.cursor()
    cwt_data = cwt_data.transpose(2, 0, 1)
    cwt_data = cwt_data.reshape(cwt_data.shape[0], -1)

    for i, single_cwt in enumerate(cwt_data):
        cwt_blob = pickle.dumps(np.array(single_cwt, dtype=np.float32))
        cursor.execute(
            "INSERT INTO wavelet_transforms (cwt_data, target) VALUES (%s, %s)",
            (psycopg2.Binary(cwt_blob), targets[i]),
        )
    
    conn.commit()
    conn.close()

In [6]:
# from tqdm import tqdm
# import numpy as np
# import pywt
# 
# def df_to_CWTfiles(
#         df, num_of_rows=1000, wave="cgau4", frq=160, resolution=100, db_path="cwt_data.db"
# ):
#     """
#     This function takes in a dataframe and saves the continuous wavelet transform of the signals to a database.
# 
#     Args:
#         df (pd.DataFrame): The dataframe containing the signals
#         num_of_rows (int): The number of rows to process 
#         wave (str): The type of wave to use
#         frq (int): The frequency of the signals
#         resolution (int): The resolution of the wavelet transform
#         db_path (str): The path to the database
#     Returns:
#         None
#     """
#     create_database(db_path)  # Ensure this function is defined elsewhere in your code.
# 
#     # Calculate the number of chunks to process
#     num_chunks = len(df) // num_of_rows + (1 if len(df) % num_of_rows != 0 else 0)
#     
#     # Create a tqdm progress bar for the loop
#     for i in range(0, len(df), num_of_rows):
#         end_index = i + num_of_rows
#         if end_index > len(df):
#             end_index = len(df)
#         signals = df.iloc[i:end_index].values
#         list_cwt = []
# 
#         if signals.shape == (num_of_rows, 65):
#             signals = signals.transpose(1, 0)
#         
#         for signal in signals[:-1]:  # Exclude the last item assuming it's the target
#             signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
#             time = np.linspace(0, len(signal) / frq, len(signal))
#             widths = np.geomspace(1, 200, num=resolution)
#             sampling_period = np.diff(time).mean()
#             cwtmatr, freqs = pywt.cwt(
#                 signal, widths, wave, sampling_period=sampling_period
#             )
#             cwtmatr = np.abs(cwtmatr)
#             list_cwt.append(cwtmatr)
# 
#         targets = signals[-1]  # Assuming the last row are the targets
#         array_cwt = np.stack(list_cwt, axis=0)
#         insert_cwt_data(db_path, array_cwt, targets)  # Ensure this function is defined elsewhere in your code.
#         del array_cwt


In [None]:
def df_to_CWTdb(
        df, num_of_rows=1000, wave="cgau4", frq=160, resolution=100, db_path="cwt_data.db"
):
    create_database(dbname="mydatabase", user='myuser', password='mysecretpassword', host=5432)

    for i in range(0, len(df), num_of_rows):
        if i + num_of_rows > len(df):
            break
        signals = df.iloc[i: i + num_of_rows].values
        list_cwt = []
        targets = ()
        if signals.shape == (num_of_rows, 65):
            signals = signals.transpose(1, 0)
        j = 0
        # print(len(signals))
        for signal in signals:
            j += 1
            if j == len(signals):
                targets = signal
                break
            signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
            time = np.linspace(0, len(signal) / frq, len(signal))
            widths = np.geomspace(1, 200, num=resolution)
            sampling_period = np.diff(time).mean()
            cwtmatr, freqs = pywt.cwt(
                signal, widths, wave, sampling_period=sampling_period
            )
            cwtmatr = np.abs(cwtmatr)
            list_cwt.append(cwtmatr)

        array_cwt = np.stack(list_cwt, axis=0)
        insert_cwt_data(dbname="mydatabase", user="myuser", password='mysecretpassword',host=5432, cwt_data=array_cwt, targets= targets )
        # Zapis do bazy danych
        del array_cwt

# Reading data

## What is done in experiment

1. Baseline, eyes open
2. Baseline, eyes closed
3. Task 1 (open and close left or right fist)
4. Task 2 (imagine opening and closing left or right fist)
5. Task 3 (open and close both fists or both feet)
6. Task 4 (imagine opening and closing both fists or both feet)
7. Task 1
8. Task 2
9. Task 3
10. Task 4
11. Task 1
12. Task 2
13. Task 3
14. Task 4

## Read data from files to dataframe

In [8]:
df_train = read_all_file_df([3, 7], [1, 2, 3], path="../../data/raw/")
df_val = read_all_file_df([3, 7], [5, 6], path="../../data/raw/") 

Extracting EDF parameters from /home/daniel/repos/Decoding_of_EEG/data/raw/S001/S001R03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from /home/daniel/repos/Decoding_of_EEG/data/raw/S001/S001R07.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...
Extracting EDF parameters from /home/daniel/repos/Decoding_of_EEG/data/raw/S002/S002R03.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19679  =      0.000 ...   122.994 secs...
Extracting EDF parameters from /home/daniel/repos/Decoding_of_EEG/data/raw/S002/S002R07.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19679  =      0.000 ...   122.994 secs...
Extracting EDF parameters from /home/daniel/repos/Decoding_of_EE

## Transform data to CWT and save to database

In [17]:
df_to_CWTdb(
    df_val, num_of_rows=1000, wave="cgau4", frq=160, resolution=10, db_path="./df_val_cwt_data.db"
)
df_to_CWTdb(
    df_train, num_of_rows=1000, wave="cgau4", frq=160, resolution=10,
    db_path="./df_train_cwt_data.db"
)

OperationalError: connection to server at "2222" (0.0.8.174), port 5432 failed: Connection timed out
	Is the server running on that host and accepting TCP/IP connections?


In [None]:
df_train

In [None]:
df_val

In [None]:
def df_to_CWT(
        df, num_of_rows=1000, wave="cgau4", frq=160, resolution=100, db_path="cwt_data.db"
):
    create_database(db_path)

    for i in range(0, len(df), num_of_rows):
        if i + num_of_rows > len(df):
            break
        signals = df.iloc[i: i + num_of_rows].values
        list_cwt = []
        targets = ()
        if signals.shape == (num_of_rows, 65):
            signals = signals.transpose(1, 0)
        j = 0
        for signal in signals:
            j += 1
            if j == len(signals):
                targets = signal
                break
            signal = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
            time = np.linspace(0, len(signal) / frq, len(signal))
            widths = np.geomspace(1, 200, num=resolution)
            sampling_period = np.diff(time).mean()
            cwtmatr, freqs = pywt.cwt(
                signal, widths, wave, sampling_period=sampling_period
            )
            cwtmatr = np.abs(cwtmatr)
            list_cwt.append(cwtmatr)

        array_cwt = np.stack(list_cwt, axis=0)
        return array_cwt, targets  # Zapis do bazy danych


In [None]:
cwt, targ = df_to_CWT(
    df, num_of_rows=1000, wave="cgau4", frq=160, resolution=10, db_path="cwt_data.db"
)


In [None]:
array_cwt.shape