In [3]:
import numba
import os
import numpy as np
import zipfile
import shutil
import glob
import re
import math
import pandas as pd
from pathlib import Path
from scipy.io import wavfile
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline 
import pywt

### Read Dataset

In [4]:
BASE_DIR = os.getcwd()
Dirs = {
    "BASE" : BASE_DIR,
    "DATASETS" : f"{BASE_DIR}{os.sep}datasets",
}

DatasetInfo = {
    "NAME" : "EGG",
    "ZIP_NAME" : "EGG-database",
    "ZIP_PATH" : r"C:\Users\Nada\Documents\Interns\GSOC\code\dataset\EGG-database.zip"
}

In [5]:
def build_dataset_directory(dataset_info):
    name, zip_path = dataset_info['NAME'], dataset_info["ZIP_PATH"]
    dataset_dir = f"{Dirs['DATASETS']}{os.sep}{name}"
    if(os.path.exists(dataset_dir)):
        shutil.rmtree(dataset_dir)
    Path(dataset_dir).mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(dataset_dir)
    return dataset_dir
    
dataset_dir = build_dataset_directory(DatasetInfo)

In [6]:

def load_dataset_metadata(dataset_dir,i,label):
    record_name = f'ID{i}_{label}'
    record_path = f'{dataset_dir}{os.sep}{DatasetInfo["ZIP_NAME"]}{os.sep}ID{i}_{label}.txt'
    record = []
    record.append([record_name,record_path,label])
    return record


def build_dataframe():
    dataset_directory = dataset_dir
    records = []
    for i in range(1,21):
        records.extend(load_dataset_metadata(dataset_directory,i,'fasting'))
        records.extend(load_dataset_metadata(dataset_directory,i,'postprandial'))
    return  pd.DataFrame(records,columns=["name", "path", "label"])

df = build_dataframe()

In [7]:
df.head()

Unnamed: 0,name,path,label
0,ID1_fasting,c:\Users\Nada\Documents\GitHub\numpy-analyzer\...,fasting
1,ID1_postprandial,c:\Users\Nada\Documents\GitHub\numpy-analyzer\...,postprandial
2,ID2_fasting,c:\Users\Nada\Documents\GitHub\numpy-analyzer\...,fasting
3,ID2_postprandial,c:\Users\Nada\Documents\GitHub\numpy-analyzer\...,postprandial
4,ID3_fasting,c:\Users\Nada\Documents\GitHub\numpy-analyzer\...,fasting


In [8]:
df.to_csv(f"{DatasetInfo['NAME']}-metadata.csv")

In [16]:
def load_record(record,label):
    with open(record) as f:
        lines = f.readlines()
    record = []
    for index in range(len(lines)) :
        lines[index] = lines[index].strip()
        # print(lines[index])
    record.append([lines,label])
    return record 

In [17]:
def load_records(df):
    data = []
    for i, j in df.iterrows():
        record  = load_record(j[1],j[2])
        data.extend(record)
    return data

records_data = load_records(df)



In [19]:
records_data[0]

[['-0.193 -0.154  0.137',
  '-0.335 -0.148  0.212',
  '-0.124  0.025  0.235',
  '-0.084  0.142  0.286',
  '-0.073  0.199  0.305',
  '-0.081 -0.161  0.331',
  '-0.178  0.070  0.562',
  '-0.149 -0.032  0.390',
  '-0.135  0.066  0.418',
  '-0.146  0.028  0.389',
  '-0.149  0.023  0.367',
  '-0.210 -0.057  0.348',
  '-0.250 -0.146  0.280',
  '-0.191 -0.062  0.389',
  '-0.187 -0.060  0.507',
  '-0.146 -0.031  0.571',
  '-0.138  0.003  0.485',
  '-0.194 -0.042  0.471',
  '-0.153 -0.044  0.529',
  '-0.197 -0.077  0.636',
  '-0.239 -0.203  0.533',
  '-0.227 -0.139  0.412',
  '-0.304 -0.228  0.426',
  '-0.340 -0.284  0.388',
  '-0.379 -0.328  0.426',
  '-0.367 -0.323  0.105',
  '-0.295 -0.227  0.225',
  '-0.257 -0.220  0.246',
  '-0.176 -0.059  0.332',
  '-0.163 -0.006  0.293',
  '-0.219 -0.027  0.297',
  '-0.256 -0.096  0.312',
  '-0.226 -0.062  0.313',
  '-0.158  0.028  0.252',
  '-0.201 -0.082  0.180',
  '-0.184 -0.050  0.198',
  '-0.197 -0.101  0.274',
  '-0.218 -0.155  0.292',
  '-0.179 -0