In [2]:
import numpy as np
import pandas as pd
from scipy import stats

In [3]:
def custom_train_test_split(X, labels, test_size):
    X_train, X_test, y_train, y_test = [], [], [], []
    excessed = {i: int((1 - test_size)*labels.count(i)) for i in [6, 7]}
    n_phonems = len(labels)
    
    for i in range(n_phonems):
        if len(X[i]) > 700:
            continue
        phonem = labels[i]
        if phonem in excessed.keys():
            if excessed[phonem] > 0:
                excessed[phonem] -= 1
                X_train.append(X[i].copy())
                y_train.append(phonem)
            else:
                X_test.append(X[i].copy())
                y_test.append(phonem)
    
    return X_train, X_test, np.array(y_train), np.array(y_test)

In [4]:
def custom_sort(x, y):
    n_channels = len(x)
    X, labels = [], []
    
    for i in range(n_channels):
        X.append([x for _, x in sorted(zip(y, x[i]), key=lambda pair:pair[0])])
    labels = sorted(y)
    
    return X, labels

In [5]:
def perform_slicing(x, y):
    n_channels = len(x)
    X, labels = [], []
    
    for i in range(n_channels):
        sub = []
        for j in range(len(x[i])):
            chunk = len(x[i][j]) // 20
            sub.extend([x[i][j][k:k+chunk] for k in range(0, len(x[i][j])-chunk+1, chunk)])
            if i == 0 or i == 5:
                phonem = y[len(x[0])*(i==5) + j]
                labels.extend([phonem] * 20)
        X.append(sub)
    
    return X, labels

In [9]:
def threshold_cut(x):
    x_oz, x_fpz = x[-2].copy(), x[-1].copy()
    
    def find_endpoints(t_series):
        zcr = 0
        delta = 10
        start, end = None, None
        
        for i in range(delta - 1):
            zcr += t_series[i] * t_series[i + 1] < 0
        
        consecutive_up = 0
        consecutive_down = 0
        threshold = zcr * 5 / 4
        for i in range(delta - 1, len(t_series) - 1):
            zcr += t_series[i] * t_series[i + 1] < 0
            zcr -= t_series[i - delta] * t_series[i - delta + 1] < 0
            consecutive_down += zcr < threshold
            consecutive_up += zcr >= threshold
            if consecutive_down >= 10 and not start:
                start = i
            if consecutive_up >= 10 and not end:
                end = i
        
        return start, end
    
    start_1, end_1 = find_endpoints(x_oz)
    start_2, end_2 = find_endpoints(x_fpz)
    start = (start_1 + start_2) // 2
    end = (end_1 + end_2) // 2
    
    cutted_x = []
    for one_dim_series in x:
        cutted_x.append(one_dim_series[start:end])
        
    return cutted_x

In [7]:
def add_noise(X_channels):
    new_chan = X_channels.copy()
    X_channels_noised = []
    n_phonems = len(X_channels)
    
    #create noise
    noise = np.random.normal(0, 0.000002, 700)
    
    for i in range(n_phonems):
        noisy = new_chan[i].copy()
        for j in range(len(noisy)):
            for k in range(4):
                noisy[j][k] += noise[j]
            
        X_channels_noised.append(noisy)
    
    X_channels_noised.extend(X_channels.copy())
    
    return X_channels_noised