In [None]:
#!python3 -m pip install python_speech_features # Install PSF specifically on python3.
import os
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import h5py
from python_speech_features import mfcc, fbank
import matplotlib.pyplot as plt
import tracemalloc
from SerialTriggerDecoder import SerialTriggerDecoder
from scipy import signal
# Import fft
from scipy.fftpack import fft


tracemalloc.start() # Enable memory profiling

## Find files, and list structure


In [None]:

# OS walk through the directory to find the files
#root_dir = '//uni.au.dk/dfs/Tech_EarEEG/Students/Msc2022_BCM_AkselStark'
root_dir = 'bcm_behaviour_data_multi_subject'
data = [] # Empty list to store all hdf5 files in the directory

for subdir, dirs, files in sorted(os.walk(root_dir)):
    for file in files:
        if "hdf5" in file:
            print("\n\n\n")
            # Load the hdf5 file, and append to the list
            print(file)
            
            data.append(h5py.File(os.path.join(subdir, file), 'r'))
            
            print(f'{subdir}/{file}') # Print the filename
            keylist = list(data[0].keys())
            print( keylist) # Print the keys of the hdf5 file
            
            
#data = data[:3] # Create subset of the files, to create training and validation set
del data[2]




for i,j in enumerate(data):
    print("\n\n\n")
    print(f'Index of the hdf5 file: {i}') # Print the index of the hdf5 file
    for key in keylist:
        print(key)
        try: # Try to print the keys. If it fails, print the data
            print(f"    {list(data[i][key].keys())}")
        except:
            print("    No subkeys")
            print(f"    {np.array(data[i][key])}")
            s = data[i][key]
                        
                        

In [None]:
print(f'Memory usage: {tracemalloc.get_traced_memory()[0]/1000000} MB\n')
data_bcm = []
labels_bcm = []
for file in data:
    print(f"data    {file}")
    data_full = np.array(file['DAQ970A']['data'])
    data_bcm.append(data_full[:,0])
    data_bcm.append(data_full[:,1])
    print(f'Memory usage: {tracemalloc.get_traced_memory()[0]/1000000} MB\n')

    print(f"Labels    {file}")
    labels_bcm.append(file['DAQ970A']['data'][:,3])
    print(f'Memory usage: {tracemalloc.get_traced_memory()[0]/1000000} MB\n')

In [None]:
last_decoded = -500000
label_index_list = [] # List of the indices of the labels
for i, j in enumerate(np.rint(labels_bcm[0])):
    
    if int(j) and (i > last_decoded+50000*8):
        #print(manchester_decode(np.rint(labels_bcm[0][i-900:i+100000][0::2480])))
        print(f"i: {i}")
        print(f"Time: {i/50000} s")
        last_decoded = i
        
        label_index_list.append(i)

In [None]:
index_labels = [] # A list of labels for the indexes in label_index_list

for i in range(6):
    index_labels.extend([0,0,0,1,1,1,2,2,2,-1])

for i in range(18): index_labels.append(3)
index_labels.append(-1)

for i in range(18): index_labels.append(4)
index_labels.append(-1)

# New list with negative values removed list conprehension
index_labels_new = [x for x in index_labels if x != -1]





# Pcolormesh of index_labels where the labels are > 0
fig = plt.figure(figsize=(30, 5))
plt.pcolormesh(np.array(index_labels_new).reshape(1,-1),shading='auto',cmap='tab10')

# Add legend outside of the plot
plt.legend(['0', '1', '2', '3', '4'], loc='upper right')
plt.show()

In [None]:
# Add the sections to 5 lists corresponding to the 5 different labels/classes
''' 
Classes:
    Breathing: 0
    Snoring: 1
    Hold_breath: 2
    Chewing: 3
    Talking: 4
'''
fs = 50000
nested_class_list = [[],[],[],[],[]]


for datastream in data_bcm: # Loop through the data 
    for i, j in enumerate(label_index_list):
        if index_labels[i]>=0:
            nested_class_list[index_labels[i]].append(datastream[j:j+fs*10]) # Append 

In [None]:
fig, ax = plt.subplots(figsize=(20, 5))
class_label_list = ['Breathing', 'Snoring', 'Hold_breath', 'Chewing', 'Talking']

# Concatenate the arrays, calc mfccs and save them to files
for i, j in enumerate(nested_class_list):
    stacked_array = np.hstack(j)
    
    # Plot the stacked array
    '''fig = plt.figure(figsize=(30, 5))
    plt.plot(stacked_array, label=f'Data for label {i}')
    plt.xlabel('Samples')
    plt.ylabel('Amplitude')
    plt.legend()
    plt.grid(which = 'minor')
    plt.show()'''
    
    # Create and save mfccs
    data_mfcc = mfcc(stacked_array, samplerate = fs, nfft = 1600, winlen=0.032, winstep=0.032, numcep=16) # Sample rate is important when using mel scale
    
    #plot_mfcc(data_mfcc)
    
    np.save(f'data/bcm_alt_3/train/{i}.npy', data_mfcc)