In [None]:
# данный модуль взят из репозитория https://github.com/bitrate16/K-2021

In [1]:
import os
import mne
OUT_PATH = '../data'
VISUAL_SUBPATH = 'Visual'
AUDIAL_SUBPATH = 'Audial'

TARGET_CHANNELS = 4
TARGET_CHANNEL_SETS = [
	[ 'EEG F7-A1', 'EEG F7-M1' ],
	[ 'EEG F3-A1', 'EEG F3-M1' ],
	[ 'EEG T3-A1', 'EEG T3-M1' ],
	[ 'EEG C3-A1', 'EEG C3-M1' ],
]
SOURCE_FREQ     = 1000 # Article: 1000Hz
SECTOR_LENGTH   = 600
SECTOR_LENGTH_STEPS = 600
MAX_MORLET_FREQ = 30
MORLET_FREQ_STEPS = 30
LOW_PASS_FREQ   = 3
HIGH_PASS_FREQ  = 30
MAX_SAMPLE_LENGTH = 1.5

# Phonemes are enumerated in range 2, 3, 4, 5, 6, 7, 8
MIN_PHONEME_ID = 2
PHONEME_COUNT  = 7

# List of EDF files to use
# These files are taken from CLEARED_PATH/VISUAL_SUBPATH and CLEARED_PATH/AUDIAL_SUBPATH
INPUT_EDF_LIST = [
	'Antonovazrf_och',
	'BazvlkDzrf_och',
	'DachaPapzrf_och',
	'Drachenkozrf_och',
	'Gordokovzrf_och',
	'Manenkovzrf_och',
	'pavluhinNzrf_och',
	'rylkovSzrf_och',
	'Sazanovazrf_och',
	'vinickiDzrf_och',
]

In [None]:
def convert_edf():
	"""
	Convert EDF files using Mitr_Edf.exe utility.
	"""
	
	os.makedirs(f'{OUT_PATH}/{VISUAL_SUBPATH}', exist_ok=True)
	os.makedirs(f'{OUT_PATH}/{AUDIAL_SUBPATH}', exist_ok=True)

	for file in os.listdir(f'{DATA_PATH}/{VISUAL_SUBPATH}'):
		processed_file = f'{DATA_PATH}/{VISUAL_SUBPATH}/{file[:-4]}_.EDF'
		target_file    =  f'{OUT_PATH}/{VISUAL_SUBPATH}/{file}'
		file           = f'{DATA_PATH}/{VISUAL_SUBPATH}/{file}'
		
		print('Processing', file)
		try:
			os.remove(processed_file)
		except:
			pass
		subprocess.run(f'Mitr_Edf.exe {file}')
		
		print('Moving', processed_file, 'to', target_file)
		try:
			os.rename(processed_file, target_file)
		except:
			try:
				os.replace(processed_file, target_file)
			except:
				print('Critical failture')
				traceback.print_exc()
				break

	for file in os.listdir(f'{DATA_PATH}/{AUDIAL_SUBPATH}'):
		processed_file = f'{DATA_PATH}/{AUDIAL_SUBPATH}/{file[:-4]}_.EDF'
		target_file    =  f'{OUT_PATH}/{AUDIAL_SUBPATH}/{file}'
		file           = f'{DATA_PATH}/{AUDIAL_SUBPATH}/{file}'
		
		print('Processing', file)
		try:
			os.remove(processed_file)
		except:
			pass
		subprocess.run(f'Mitr_Edf.exe {file}')
		
		print('Moving', processed_file, 'to', target_file)
		try:
			os.rename(processed_file, target_file)
		except:
			try:
				os.replace(processed_file, target_file)
			except:
				print('Critical failture')
				traceback.print_exc()
				break

In [None]:
def list_visual_edf():
	"""
	List visual EDF file names
	"""
	
	return os.listdir(f'{OUT_PATH}/{VISUAL_SUBPATH}')

def open_visual_edf(filename):
	"""
	Open visual data file and return EDF object
	"""
	file = f'{OUT_PATH}/{VISUAL_SUBPATH}/{filename}'

	return mne.io.read_raw_edf(file)

def list_audial_edf():
	"""
	List audial EDF file names
	"""
	
	return os.listdir(f'{OUT_PATH}/{AUDIAL_SUBPATH}')

def open_audial_edf(filename):
	"""
	Open audial data file and return EDF object
	"""
	file = f'{OUT_PATH}/{AUDIAL_SUBPATH}/{filename}'

	return mne.io.read_raw_edf(file)

In [None]:
def new_extract_strict_sectors(edf, sector_length = 600):
	"""
	Extract sectors of the given length using labels.
	Sample usage is extracting sectors of length 600 ms (1000Hz).

	Returns segments[begin,end] and labels

		** 1n восприятие / n проговаривание
	"""
	sectors = []
	labels = []

	number_of_current_phoneme = None
	counter = 0
	silent_speach = False
	
	METKA = edf['METKA']
	X = METKA[1]
	Y = METKA[0].T[:,0]
  
	for index, (timestamp, value) in enumerate(zip(X, Y)):
		counter-=1 
		if value > 0:
			value = int(value)
			
			# segment begin of silent speach 
			if value // 10 == 1:
				counter = sector_length
				number_of_current_phoneme = value % 10
				silent_speach = True
			else:
				silent_speach = False # another label 
        
		if silent_speach and counter == 0: 
			sectors.append((index - sector_length, index))
			labels.append(number_of_current_phoneme)
	
	return sectors, labels

In [None]:
def extract_sectors(edf): # Returns sectors[begin,end] and missing_labels
	"""
	Extract valid and invalid sectors from edf data file
	"""
	sectors = []
	missing_labels = []
	last_label = None
	last_label_index = None
	
	METKA = edf['METKA']
	X = METKA[1]
	Y = METKA[0].T[:,0]

	for index, (timestamp, value) in enumerate(zip(X, Y)):
		if value > 0:
			value = int(value)
			
			# Phoneme begin
			if value // 10 == 1:
				if last_label is not None:
					if last_label // 10 == 1:
						missing_labels.append(last_label_index)
				
				last_label = value
				last_label_index = index
			
			# Phoneme end
			elif value // 10 == 2:
				if last_label is not None:
					if last_label % 10 != value % 10:
						missing_labels.append(last_label_index)
						missing_labels.append(index)
					else:
						sectors.append((last_label_index, index))
						last_label = None
				else:
					missing_labels.append(index)
	
	return sectors, missing_labels

In [None]:
def subselect_channels(edf):
	print(f'Available channels: {edf.ch_names}')
	
	channels = [ None ] * TARGET_CHANNELS
	for i in range(TARGET_CHANNELS):
		
		# Iterate over all channels find compatible channel names
		for comatible in range (len(TARGET_CHANNEL_SETS[i])):
			try:
				channels[i] = edf[TARGET_CHANNEL_SETS[i][comatible]][0][0]
				break
			except:
				continue
		
		if channels[i] is None:
			raise RuntimeError(f'No compatible channels found for channels {TARGET_CHANNEL_SETS[i]}')
	
	return channels

def split_sectors(edf, channels_data, sectors):
	"""
	Performs slicing of the given channels using sector info data.
	Returns label number, split length, split duration and splitted data for channels
	"""
	
	METKA = edf['METKA']
	X = METKA[1]
	Y = METKA[0][0] # .T[:,0]
	
	splitted  = [ [ None ] * len(sectors) for i in range(len(channels_data)) ]
	lengths   = [ None ] * len(sectors)
	durations = [ None ] * len(sectors)
	labels    = [ None ] * len(sectors)
	
	for index in range(len(sectors)):
		(a, b) = sectors[index]
		
		labels[index]    = int(Y[a]) % 10
		lengths[index]   = b - a
		durations[index] = X[b] - X[a]
		
		for index2, f in enumerate(channels_data):
			splitted[index2][index] = f[a:b]
	
	return labels, lengths, durations, splitted