In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn.feature_selection import VarianceThreshold, SelectKBest, SelectFromModel, f_classif, mutual_info_regression, RFE, RFECV
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.linear_model import LinearRegression, Lasso, LogisticRegression
from sklearn.preprocessing import StandardScaler
#from imblearn.under_sampling import ClusterCentroids
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
from sklearn.metrics import accuracy_score, recall_score, f1_score
from environment_variables.global_variables import ROOT, SOURCE_FOLDER, OUTPUT_FOLDER
import toolbox as tb

In [2]:
PATIENTS = ['3132', '3105', '3152', '3249', '3300']
SOURCE_PATH = os.path.normpath(os.path.join(ROOT, SOURCE_FOLDER)).replace('/','\\')
OUTPUT_PATH = os.path.join(ROOT, OUTPUT_FOLDER)

master = pd.DataFrame()

sr = 16000
n_mels = 256
# Each windows is 1 second
n_fft = int(sr * 1)
# Each windows has an overlap of 25%
hop_length = int(n_fft * 0.75)
f_max = sr / 2 * 0.8

In [3]:
SOURCE_PATH

'DATA'

In [5]:
path = os.path.join(SOURCE_PATH, "3105")
path

'DATA/3105'

In [6]:
os.path.exists(path)

False

In [None]:

if os.path.exists(path):
    return [f for f in os.listdir(path) if f.endswith('.wav')]
else:
    return []

In [31]:
recordings = tb.get_recordings(root=SOURCE_PATH, folder="3105")
recordings

[]

In [11]:
# Extracting the recording and loading them in a DataFrame.
for patient in PATIENTS:
    recordings = tb.get_recordings(root=SOURCE_PATH, folder=patient)
    for recording in recordings:
        
        df = pd.read_excel(os.path.join(SOURCE_PATH, patient, f'PAT{patient}.xlsx'))

        # * Loading the labels
        start_indexes = []
        end_indexes = []
        for _, row in df[
                (
                    df['video'] == recording[:-4]
                ) & (
                    df['label'] == 'seizure'
                )
            ].iterrows():
            start_indexes.append(row['start_time'])
            end_indexes.append(row['end_time'])
        
        # * Loading the recording and add the labels
        df_recording = tb.load_recording(
            path=os.path.join(SOURCE_PATH, patient),
            file= recording,
            n_fft=n_fft,
            hop_length=hop_length,
            n_mels=n_mels,
            power_to_db=True,
        )

        df_recording = tb.project_label_recording(
            data=df_recording,
            start_indexes=start_indexes,
            end_indexes=end_indexes,
            hop_length=hop_length,
            sr=sr,
        )
        
        if len(df_recording['label'].unique()) > 2:
            df_recording.to_excel(os.path.join(OUTPUT_PATH, 'error_df_recording.xlsx'))
        
        master = pd.concat([master, df_recording])
        



In [12]:
master.head()

In [5]:
masters = [master]
for col in masters[0].columns:
    if isinstance(col, int):
        temp = pd.DataFrame()
        temp['shortmean_' + str(col)] = master[col].rolling(window=5).mean()
        temp['shortstd_' + str(col)] = master[col].rolling(window=5).std()
        temp['longmean_' + str(col)] = master[col].rolling(window=20).mean()
        temp['longstd_' + str(col)] = master[col].rolling(window=20).std()
        temp['shift1_' + str(col)] = master[col].shift(1)
        temp['shift2_' + str(col)] = master[col].shift(2)
        temp['shift3_' + str(col)] = master[col].shift(3)
        masters.append(temp)

master = pd.concat(masters, ignore_index=False, axis=1)

In [6]:
master.head()

In [7]:
# We convert the column names so that the n_mels columns are interpreted as string.
master.columns = master.columns.astype(str)
master.dropna(how='any', inplace=True, axis=0)

In [9]:
master.head()

In [8]:
data = master.drop(labels=['label'], axis=1, inplace=False)
target = master['label']

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)

KeyError: "['label'] not found in axis"