In [1]:
#   |'''''''''''''╔╬╬╬╬╬╬╬╬   _____  _____      _____  _____      ___   __
#   |            ╔╬╬╬╬╬╬╬╬╬  |\   _ \  _  \    |\   _ \  _  \    |\  \|\  \
#   | ░░         ╬╬╬╬╬╬╬╬╬╬  \ \  \\__\ \  \   \ \  \\__\ \  \   \ \  \/  /|_
#    ░░░░        ╬╬╬╬╬╬╬╬╬╬   \ \  \|__| \  \   \ \  \|__| \  \   \ \   ___  \
#   ░░░░░╦╬╦    ╔╬╬╬╬╬╬╬╬╬╬    \ \  \   \ \  \   \ \  \   \ \  \   \ \  \ \   \
#  ░░░░░╬╬╬╬ ▓▓└╬╬╬╬╬╬╬╬╬╬╬     \ \__\   \ \__\   \ \__\   \ \__\   \ \__\ \___\
# ░░░░░╔╬╬╬ ▓▓▓  ╓╬╬╬╬╬╬╬╬╬      \|__|    \|__|    \|__|    \|__|    \|__| \|__|
# ░░░░░╠╬╬╬ ▓▓▓  └╬╬╬╬╬╬╬╬╬
#  ░░░░└╬╬╬╬ ▓▓   ╬╬╬╬╬╬╬╬╬  Lehrstuhl für Mensch-Maschine-Kommunikation
#  ░░░░░╙╬╬╬╩            ╬╬  Technische Universität München
#   ░░░░░░╚ '''''''''''''''  Author: Tobias Watzel
#    ░░░                     Copyright 2020
#

%matplotlib widget
import os
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.lines as lines
from matplotlib import rc
import IPython.display as ipd
import librosa.display
import numpy as np
import scipy
import more_itertools
from scipy import signal
from scipy.fftpack import fft, fftshift, rfft
import pandas as pd
import dill

# init fig for matplotlib
fig = None

## Versuchsbeschreibung:
In diesem Versuch sehen Sie, wie aus einzelnen Fenstern eines Signals Merkmale für die Spracherkennung extrahiert werden.
Hierfür können Sie im ersten Schritt zwei Bereiche aus dem Wort "sieben" herausschneiden, die unterschiedliche Laute beinhalten.
Im zweiten Schritt sehen Sie die Merkmalsvektoren der zugehörigen Fenster.

Probieren Sie verschiedene Extraktionsmethoden und Bereiche aus und sehen Sie sich die Auswirkung auf die extrahierten Merkmalsvektoren an.
Bedenken Sie dabei, dass anhand der Merkmale eine Unterscheidung der Laute möglich sein muss. Das heißt, innerhalb eines Bereiches sollten die Merkmalswerte möglichst gleich sein, jedoch zwischen den beiden Bereichen möglichst unterschiedlich.

In [2]:
# load audio file
y, sr = librosa.load('wav_files/sieben.wav', sr = 16000)
x = np.linspace(0, y.size, num = y.size)

# close old figures if avaible

    
text_startblue = widgets.Text(value = '256', description = 'Startblue:')
text_endblue = widgets.Text(value = '768', description = 'Endblue:')
text_startgreen = widgets.Text(value = '2048', description = 'Startgreen:')
text_endgreen = widgets.Text(value = '2560', description = 'Endgreen:')

box = widgets.HBox([widgets.VBox([text_startblue, text_endblue]), widgets.VBox([text_startgreen, text_endgreen])])

display(box)

HBox(children=(VBox(children=(Text(value='256', description='Startblue:'), Text(value='768', description='Endb…

In [3]:
if fig:
    plt.close(fig)
    
fig = plt.figure(figsize = (9, 7))
ax1 = plt.subplot(211)
ax2 = plt.subplot(223)
ax3 = plt.subplot(224)



ax1.plot(x, y, linewidth=1, color='r')
ax1.set_xlim(0.0, y.size)
ax1.set_title("Zeitverlauf Signal")
ax1.set_xlabel("Sample")
ax1.set_ylabel("Amplitude")
ax1.grid()

# do cutting
y_cut_blue = y[int(text_startblue.value):int(text_endblue.value)]
y_cut_green = y[int(text_startgreen.value):int(text_endgreen.value)]

x_blue = np.arange(int(text_startblue.value), int(text_endblue.value))
x_green = np.arange(int(text_startgreen.value), int(text_endgreen.value))


l1 = ax1.axvline(x = int(text_startblue.value), color = 'b')
l2 = ax1.axvline(x = int(text_endblue.value), color = 'b')
l3 = ax1.axvline(x = int(text_startgreen.value), color = 'g')
l4 = ax1.axvline(x = int(text_endgreen.value), color = 'g')

# plot blue window
ax2.plot(x_blue, y_cut_blue, linewidth=1, color='r')
ax2.set_xlim(x_blue.min(), x_blue.max())
ax2.set_title("Zeitverlauf blaues Fenster")
ax2.set_xlabel("Sample")
ax2.set_ylabel("Amplitude")
ax2.grid()

# plot blue window
ax3.plot(x_green, y_cut_green, linewidth=1, color='r')
ax3.set_xlim(x_green.min(), x_green.max())
ax3.set_title("Zeitverlauf grünes Fenster")
ax3.set_xlabel("Sample")
ax3.set_ylabel("Amplitude")
ax3.grid()


plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [4]:
def autocorr(x):
    result = np.correlate(x, x, mode='full')
    return result[result.size // 2:]

def calculate_fbk(signal_in, num_coefficients, window_size):
    step_size = 256
    
    features = np.zeros([num_coefficients, int(signal_in.size / step_size)])
    
    for i, w in enumerate(more_itertools.windowed(signal_in, n = window_size, step = step_size, fillvalue = '!')):
        power_spectrum = np.abs(np.fft.rfft(w, len(w))) ** 2
        for j, x in enumerate(np.array_split(power_spectrum, num_coefficients)): 
            features[j, i] = np.mean(x)
        features[:, i] /= np.sqrt(np.sum(features[:, i] ** 2))

    return features

def calculate_mfcc(signal_in, num_coefficients):
    return librosa.feature.mfcc(signal_in, sr = sr, n_mfcc = num_coefficients)

def calculate_akk(signal_in, num_coefficients, window_size):
    step_size = 256
    
    features = np.zeros([num_coefficients, int(signal_in.size / step_size)])
    for i, w in enumerate(more_itertools.windowed(signal_in, n = window_size, step = step_size, fillvalue = '!')):  
        autocorr_tmp = autocorr(w) / window_size
        for j in range(num_coefficients):
            features[j, i] = autocorr_tmp[j]
        
        features[:, i] /= autocorr_tmp[0]
        
    return features

def create_column_names(y_cut_blue, y_cut_green, window_size):
    
    num_windows_blue = int(y_cut_blue.size / window_size)
    num_windows_green = int(y_cut_green.size / window_size)
    
    column_list_fbk = ['FBK (blau)'] * num_windows_blue + ['FBK (grün)'] * num_windows_blue
    column_list_akk = ['AKK (blau)'] * num_windows_blue + ['AKK (grün)'] * num_windows_blue
    column_list_mfcc = ['MFCC (blau)'] * num_windows_blue + ['MFCC (grün)'] * num_windows_blue
    return column_list_fbk + column_list_akk + column_list_mfcc
    

#ätmp = np.around(, decimals = 2)
features_fbk_blue = calculate_fbk(y_cut_blue, 12, 256)
features_fbk_green = calculate_fbk(y_cut_green, 12, 256)


features_akk_blue = calculate_akk(y_cut_blue, 12, 256)
features_akk_green = calculate_akk(y_cut_green, 12, 256)

features_mfcc_blue = calculate_mfcc(y_cut_blue, 12)
features_mfcc_green = calculate_mfcc(y_cut_green, 12)
    
dataframe = pd.DataFrame(np.concatenate([features_fbk_blue, features_fbk_green, features_akk_blue, features_akk_green,
                                        features_mfcc_blue, features_mfcc_green], axis = 1))

dataframe.columns = create_column_names(y_cut_blue, y_cut_green, 256)

widget1 = widgets.Output()

# render in output widgets
with widget1:
    ipd.display(dataframe)

# create HBox
hbox = widgets.HBox([widget1])

display(hbox)
    

HBox(children=(Output(),))

## 1. Welche der Aussagen ist richtig?

In [5]:
save_radio_select = 'save/radio_select_features.dill'

radio_button_array = np.array([False, False, False])
# try to load checkbox array
try: 
    with open(save_radio_select, 'rb') as fp:
        radio_button_array = dill.load(fp)
except:
    pass

radio_button_list = ['Die FB-Koeffizienten eignen sich als Merkmale besonders gut, da sie sehr gut sprecherspezifische und nicht-sprecherspezifische Informationen trennen können.',
                     'Die MFC-Koeffizienten eignen sich besonders gut, da sie gut dekorreliert sind und sich daher anschließend besser reduzieren lassen.',
                     'Um die AK-Koeffiezienten zu bestimmen, muss man zunächst das Spektrum berechnen.']
value_radio = [None if len(np.where(radio_button_array == True)[0]) == 0 else 
               radio_button_list[np.where(radio_button_array == True)[0][0]]]

def callback_checkbox(change):
    if change['type'] == 'change' and change['name'] == 'value':
        radio_button_array[radio_button_list.index(change['new'])] = not radio_button_array[radio_button_list.index(change['new'])]
        if change['old']:
            radio_button_array[radio_button_list.index(change['old'])] = not radio_button_array[radio_button_list.index(change['old'])]
        # save when changed
        with open(save_radio_select, 'wb') as fp:
            dill.dump(radio_button_array, fp)            

radio_buttons = widgets.RadioButtons(options=radio_button_list, 
                                     value=value_radio[0], layout={'width' : 'initial'})

radio_buttons.observe(callback_checkbox)

display(radio_buttons)

RadioButtons(index=1, layout=Layout(width='initial'), options=('Die FB-Koeffizienten eignen sich als Merkmale …

In [6]:
# Autograding answer, please ignore
