In [1]:
#   |'''''''''''''╔╬╬╬╬╬╬╬╬   _____  _____      _____  _____      ___   __
#   |            ╔╬╬╬╬╬╬╬╬╬  |\   _ \  _  \    |\   _ \  _  \    |\  \|\  \
#   | ░░         ╬╬╬╬╬╬╬╬╬╬  \ \  \\__\ \  \   \ \  \\__\ \  \   \ \  \/  /|_
#    ░░░░        ╬╬╬╬╬╬╬╬╬╬   \ \  \|__| \  \   \ \  \|__| \  \   \ \   ___  \
#   ░░░░░╦╬╦    ╔╬╬╬╬╬╬╬╬╬╬    \ \  \   \ \  \   \ \  \   \ \  \   \ \  \ \   \
#  ░░░░░╬╬╬╬ ▓▓└╬╬╬╬╬╬╬╬╬╬╬     \ \__\   \ \__\   \ \__\   \ \__\   \ \__\ \___\
# ░░░░░╔╬╬╬ ▓▓▓  ╓╬╬╬╬╬╬╬╬╬      \|__|    \|__|    \|__|    \|__|    \|__| \|__|
# ░░░░░╠╬╬╬ ▓▓▓  └╬╬╬╬╬╬╬╬╬
#  ░░░░└╬╬╬╬ ▓▓   ╬╬╬╬╬╬╬╬╬  Lehrstuhl für Mensch-Maschine-Kommunikation
#  ░░░░░╙╬╬╬╩            ╬╬  Technische Universität München
#   ░░░░░░╚ '''''''''''''''  Author: Tobias Watzel
#    ░░░                     Copyright 2020
#

%matplotlib widget
import os
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.lines as lines
from matplotlib import rc, cm
import IPython.display as ipd
import librosa.display
import numpy as np
import scipy
import more_itertools
from scipy import signal
from scipy.fftpack import fft, fftshift, rfft
import pandas as pd
import dill
from dtw import accelerated_dtw
from functools import partial
from numpy.linalg import norm

# init fig for matplotlib
fig = None

## Versuchsbeschreibung:
In diesem Versuch sehen Sie, wie aus einzelnen Fenstern eines Signals Merkmale für die Spracherkennung extrahiert werden.
Hierfür können Sie im ersten Schritt zwei Bereiche aus dem Wort "sieben" herausschneiden, die unterschiedliche Laute beinhalten.
Im zweiten Schritt sehen Sie die Merkmalsvektoren der zugehörigen Fenster.

Probieren Sie verschiedene Extraktionsmethoden und Bereiche aus und sehen Sie sich die Auswirkung auf die extrahierten Merkmalsvektoren an.
Bedenken Sie dabei, dass anhand der Merkmale eine Unterscheidung der Laute möglich sein muss. Das heißt, innerhalb eines Bereiches sollten die Merkmalswerte möglichst gleich sein, jedoch zwischen den beiden Bereichen möglichst unterschiedlich.

In [2]:
if fig:
    plt.close(fig)
    
# define file list
properties_dict = {'name_list': ['Bill', 'George', 'John', 'Charly'],
                  'number_list': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
                  'speed_list': ['schnell', 'normal', 'langsam'],
                  'version_list': ['0', '1']}

description_dict = {'name_list': 'Sprecher:',
                  'number_list': 'Wort (Zahl):',
                  'speed_list': 'Geschwindigkeit:',
                  'version_list': 'Version:'}

mapping_dict = {'name': 0,
                  'number': 1,
                  'speed': 2,
                  'version': 3}

selections_ref = ['select_name_ref', 'select_number_ref', 'select_speed_ref', 'select_version_ref']
selections_test = ['select_name_test', 'select_number_test', 'select_speed_test', 'select_version_test']

select_dict = {}



for select in selections_ref + selections_test:
    helper_string = select.split("_")[1] + '_list'
    select_dict[select] = widgets.Dropdown(options = properties_dict[helper_string], value = properties_dict[helper_string][0], layout={'height': '40px'}, 
                                           description = description_dict[helper_string])


de_en = {'schnell': 'fast', 'normal': 'norm', 'langsam': 'slow'}

list_ref = [select_dict[ele].value if 'speed' not in ele else de_en[select_dict[ele].value] for ele in selections_ref]
list_test = [select_dict[ele].value if 'speed' not in ele else de_en[select_dict[ele].value] for ele in selections_test]

# define vars
y1, sr1 = librosa.load('wav_files/%s/%s_%s_%s.wav' % tuple(list_ref))
y2, sr2 = librosa.load('wav_files/%s/%s_%s_%s.wav' % tuple(list_test))

In [3]:
if fig:
    plt.close()

def calculate_mfcc(signal_in, num_coefficients, sample_rate):
    return librosa.feature.mfcc(signal_in, sr = sample_rate)    

fig = plt.figure(figsize = (11, 6))
ax1 = plt.subplot(111)

# init plot
a = calculate_mfcc(y1, 12, sr1)
b = calculate_mfcc(y2, 12, sr2)
dist, cost, acc_cost, path = accelerated_dtw(a.T, b.T, dist=lambda x, y: norm(x - y, ord=1))
ax1.imshow(cost.T, origin='lower', cmap=cm.gray, interpolation='nearest')
ax1.plot(path[0], path[1], 'w')
ax1.set_xlabel('Referenzwort')
ax1.set_ylabel('Testwort')
ax1.set_xlim((-0.5, cost.shape[0]-0.5))
ax1.set_ylim((-0.5, cost.shape[1]-0.5))


label_ref = widgets.Label(value = 'Wort (Referenz)')
label_test = widgets.Label(value = 'Wort (Test)')

#for select in selections_ref:
#    select_dict[select] = select_dict[select].observe(lambda change: change_selection(change, slider_string = select), 'value', type='change')

box_ref = widgets.Box([select_dict[ele] for ele in selections_ref])
box_test = widgets.Box([select_dict[ele] for ele in selections_test])
box = widgets.VBox([label_ref, box_ref, label_test, box_test], layout = widgets.Layout(justify_content = 'center'))
display(box)

def change_selection(change, slider_string):
    ax1.cla()
    
    # create helper string
    helper_string = slider_string.split("_")
    
    if helper_string[1] == 'speed':
        #print(change['new'])
        #print(helper_string)
        change['new'] = de_en[change['new']]

    # change ref list
    if helper_string[2] == 'ref':
        list_ref[mapping_dict[helper_string[1]]] = change['new']
    if helper_string[2] == 'test':
        list_test[mapping_dict[helper_string[1]]] = change['new']

    # try to load file
    try:
        y1, sr1 = librosa.load('wav_files/%s/%s_%s_%s.wav' % tuple(list_ref))
        y2, sr2 = librosa.load('wav_files/%s/%s_%s_%s.wav' % tuple(list_test))
    except:
        print(slider_string)
        print('File not found')

    # calculate mfccs
    a = calculate_mfcc(y1, 12, sr1)
    b = calculate_mfcc(y2, 12, sr2)

    # perform dtw
    dist, cost, acc_cost, path = accelerated_dtw(a.T, b.T, dist = lambda x, y: norm(x - y, ord = 1))
    
    # update plot
    ax1.imshow(cost.T, origin='lower', cmap = cm.gray, interpolation = 'nearest')
    ax1.plot(path[0], path[1], 'w')
    ax1.set_xlim((-0.5, cost.shape[0]-0.5))
    ax1.set_ylim((-0.5, cost.shape[1]-0.5))
    ax1.set_xlabel('Referenzwort')
    ax1.set_ylabel('Testwort')
    
        

# init observes ref (quite bad... TODO)
select_dict['select_name_ref'].observe(lambda change: change_selection(change, 'select_name_ref'), 'value', type='change')
select_dict['select_number_ref'].observe(lambda change: change_selection(change, 'select_number_ref'), 'value', type='change')
select_dict['select_speed_ref'].observe(lambda change: change_selection(change, 'select_speed_ref'), 'value', type='change')
select_dict['select_version_ref'].observe(lambda change: change_selection(change, 'select_version_ref'), 'value', type='change')
# init observes test (quite bad... TODO)
select_dict['select_name_test'].observe(lambda change: change_selection(change, 'select_name_test'), 'value', type='change')
select_dict['select_number_test'].observe(lambda change: change_selection(change, 'select_number_test'), 'value', type='change')
select_dict['select_speed_test'].observe(lambda change: change_selection(change, 'select_speed_test'), 'value', type='change')
select_dict['select_version_test'].observe(lambda change: change_selection(change, 'select_version_test'), 'value', type='change')
        
        

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

VBox(children=(Label(value='Wort (Referenz)'), Box(children=(Dropdown(description='Sprecher:', layout=Layout(h…

## 1. Betrachten Sie den Korridor in der Matrix. 
Wie groß sind I und J? (Sprecher1: John, Wort: 0, Geschwindigkeit: normal, Version: 0, Sprecher2: John, Wort: 0, Geschwindigkeit: normal, Version: 1)

In [8]:
save_radio_select = 'save/radio_select_dtw_ij.dill'

radio_button_array = np.array([False, False, False, False])
# try to load checkbox array
try: 
    with open(save_radio_select, 'rb') as fp:
        radio_button_array = dill.load(fp)
except:
    pass

radio_button_list = ['35 20', '37 25', '40 30', '45 35']
value_radio = [None if len(np.where(radio_button_array == True)[0]) == 0 else 
               radio_button_list[np.where(radio_button_array == True)[0][0]]]

def callback_checkbox(change):
    if change['type'] == 'change' and change['name'] == 'value':
        radio_button_array[radio_button_list.index(change['new'])] = not radio_button_array[radio_button_list.index(change['new'])]
        if change['old']:
            radio_button_array[radio_button_list.index(change['old'])] = not radio_button_array[radio_button_list.index(change['old'])]
        # save when changed
        with open(save_radio_select, 'wb') as fp:
            dill.dump(radio_button_array, fp)            

radio_buttons = widgets.RadioButtons(options=radio_button_list, 
                                     value=value_radio[0])

radio_buttons.observe(callback_checkbox)

display(radio_buttons)

RadioButtons(options=('35 20', '37 25', '40 30', '45 35'), value='35 20')

In [9]:
# Autograding answer, please ignore
