In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import librosa.display
import tensorflow as tf
import tensorflow_hub as hub
from IPython.display import display, Audio
import numpy as np
import glob
import os
import librosa
from wavegan import sample_context_embeddings

In [2]:
def generate(cond_texts, num_samples_per_cond, inference_graph_path, model_ckpt_path, embed_model='elmo'):
    try:
        cond_texts = cond_texts.tolist()
    except:
        pass
    tmp_list = []
    for cond_text in cond_texts:
        tmp_list += [cond_text] * num_samples_per_cond
    cond_texts = tmp_list
        
    _z = (np.random.randn(len(cond_texts), 128))
    
    with tf.Session() as sess:
        #print([n.name for n in tf.get_default_graph().as_graph_def().node])
        if embed_model == 'elmo':
            embed = hub.Module("https://tfhub.dev/google/elmo/2", trainable=False, name='embed')
        elif embed_model == 'nnlm':
            embed = hub.Module("https://tfhub.dev/google/nnlm-en-dim128-with-normalization/1", trainable=False, name='embed')
            
        embeddings = embed(cond_texts)

        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())
        _fake_conditionals = sess.run(embeddings)

    # Load the graph
    graph = tf.Graph()
    with graph.as_default():
        saver = tf.train.import_meta_graph(inference_graph_path)
        with tf.Session() as sess:
            saver.restore(sess, model_ckpt_path)

            # Synthesize G(z)
            z = graph.get_tensor_by_name('z:0')
            c = graph.get_tensor_by_name('c:0')
            G_z = graph.get_tensor_by_name('G_z:0')
            _G_z = sess.run(G_z, {z: _z, c: _fake_conditionals})
            return list(zip(_G_z, cond_texts))

In [3]:
def get_rand_audio_cond_pairs(audio_dir, num_samples):
    file_list = []
    for filename in glob.iglob(os.path.join(audio_dir, '**/*.wav'), recursive=True):
        file_list.append(filename)
    
    audio_files = np.random.choice(file_list, num_samples)
    audio_cond_pairs = []
    for audio_file in audio_files:
        cond_file = audio_file + '_cond.txt'
        cond_texts = np.genfromtxt(cond_file, delimiter=',', dtype=None, encoding='bytes')
        cond_text = np.random.choice(cond_texts)
        audio, sr = librosa.load(audio_file, sr=None)
        audio_cond_pairs.append([audio, cond_text])
        
    return audio_cond_pairs

In [4]:
def gen_random_samples(audio_cond_pairs, num_samples_per_cond, inference_graph_path, model_ckpt_path):
    cond_texts = np.array(audio_cond_pairs)[:,1]
    audio_set = generate(cond_texts, num_samples_per_cond, inference_graph_path, model_ckpt_path)
    return audio_set

In [5]:
def display_audio_set(audio_cond_pairs, num_samples_per_cond, audio_set, show_labels=True):
    for i, audio_data in enumerate(audio_set):
        if show_labels:
            if i % num_samples_per_cond == 0:
                training_audio = audio_cond_pairs[i // num_samples_per_cond][0]
                cond_text = audio_cond_pairs[i // num_samples_per_cond][1]
                condition_num = i // num_samples_per_cond + 1
                print('Condition {} ({})'.format(condition_num, cond_text.decode()))
                print('Real Example ({}):'.format(cond_text.decode()))
                display(Audio(training_audio, rate=16000))
        audio = audio_data[0]
        cond_text = audio_data[1]
        sample_num = i % num_samples_per_cond + 1
        if show_labels:
            print('Generated Sample {} ({}):'.format(sample_num, cond_text.decode()))
        else:
            if (i % num_samples_per_cond == 0):
                condition_num = i // num_samples_per_cond + 1
                print('Condition {}'.format(condition_num))
            print('Generated Sample {}:'.format(sample_num))
        display(Audio(audio.flatten(), rate=16000))

In [None]:
audio_cond_pairs = get_rand_audio_cond_pairs('./data/Preprocessed/Magic', 10)

In [None]:
audio_set_tconv_600k = gen_random_samples(audio_cond_pairs, 5, './train_tconv_600k/infer/infer.meta', './train_tconv_600k/model.ckpt-677768')

In [None]:
audio_set_nn_opt_reset = gen_random_samples(audio_cond_pairs, 5, './train_nn_linear_360k_opt_reset/infer/infer.meta', './train_nn_linear_360k_opt_reset/model.ckpt-360023')

In [None]:
audio_set_magic_nn_opt_reset = gen_random_samples(audio_cond_pairs, 5, './train_magic_nn_137k_opt_reset/infer/infer.meta', './train_magic_nn_137k_opt_reset/model.ckpt-136573')

In [None]:
audio_set_magic_zeros_opt_reset = gen_random_samples(audio_cond_pairs, 5, './train_magic_zeroup_123k_opt_reset/infer/infer.meta', './train_magic_zeroup_123k_opt_reset/model.ckpt-122672')

In [None]:
display_audio_set(audio_cond_pairs, 5, audio_set_tconv_600k, show_labels=True)

In [None]:
display_audio_set(audio_cond_pairs, 5, audio_set_nn_opt_reset, show_labels=True)

In [None]:
display_audio_set(audio_cond_pairs, 5, audio_set_magic_nn_opt_reset, show_labels=True)

In [None]:
display_audio_set(audio_cond_pairs, 5, audio_set_magic_zeros_opt_reset, show_labels=True)

# Interpretability - Baseline
**Click in the next cell and hit shift enter to generate sound effects for the interpretability section of the questionaire. This may take some time to complete.**

In [23]:
audio_cond_pairs = get_rand_audio_cond_pairs('./data/Preprocessed/Magic', 10)



**Click in the next cell and hit shift + enter to show the sound effects for the first question.**

In [24]:
display_audio_set(audio_cond_pairs, 1, audio_cond_pairs, show_labels=False)

Condition 1
Generated Sample 1:


Condition 2
Generated Sample 1:


Condition 3
Generated Sample 1:


Condition 4
Generated Sample 1:


Condition 5
Generated Sample 1:


Condition 6
Generated Sample 1:


Condition 7
Generated Sample 1:


Condition 8
Generated Sample 1:


Condition 9
Generated Sample 1:


Condition 10
Generated Sample 1:


**Click in the next cell and hit shift + enter to display the labels used to generate the above samples. A real example will also be given for each label.**

In [25]:
display_audio_set(audio_cond_pairs, 1, audio_cond_pairs, show_labels=True)

Condition 1 (light short)
Real Example (light short):


Generated Sample 1 (light short):


Condition 2 (weapon blast)
Real Example (weapon blast):


Generated Sample 1 (weapon blast):


Condition 3 (ice falling sliding)
Real Example (ice falling sliding):


Generated Sample 1 (ice falling sliding):


Condition 4 (burning projectile torch)
Real Example (burning projectile torch):


Generated Sample 1 (burning projectile torch):


Condition 5 (low)
Real Example (low):


Generated Sample 1 (low):


Condition 6 (fire)
Real Example (fire):


Generated Sample 1 (fire):


Condition 7 (low)
Real Example (low):


Generated Sample 1 (low):


Condition 8 (impact snow)
Real Example (impact snow):


Generated Sample 1 (impact snow):


Condition 9 (short)
Real Example (short):


Generated Sample 1 (short):


Condition 10 (single page flip hard)
Real Example (single page flip hard):


Generated Sample 1 (single page flip hard):


# Interpretability
**Click in the next cell and hit shift enter to generate sound effects for the interpretability section of the questionaire. This may take some time to complete.**

In [9]:
audio_cond_pairs = get_rand_audio_cond_pairs('./data/Preprocessed/Magic', 10)
audio_set = gen_random_samples(audio_cond_pairs, 1, './train_magic_nn_137k_opt_reset/infer/infer.meta', './train_magic_nn_137k_opt_reset/model.ckpt-136573')



INFO:tensorflow:Using D:\Users\LANCE~1.CHA\AppData\Local\Temp\tfhub_modules to cache modules.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Restoring parameters from ./train_magic_nn_137k_opt_reset/model.ckpt-136573


**Click in the next cell and hit shift + enter to show the sound effects for the first question.**

In [10]:
display_audio_set(audio_cond_pairs, 1, audio_set, show_labels=False)

Condition 1
Generated Sample 1:


Condition 2
Generated Sample 1:


Condition 3
Generated Sample 1:


Condition 4
Generated Sample 1:


Condition 5
Generated Sample 1:


Condition 6
Generated Sample 1:


Condition 7
Generated Sample 1:


Condition 8
Generated Sample 1:


Condition 9
Generated Sample 1:


Condition 10
Generated Sample 1:


**Click in the next cell and hit shift + enter to display the labels used to generate the above samples. A real example will also be given for each label.**

In [11]:
display_audio_set(audio_cond_pairs, 1, audio_set, show_labels=True)

Condition 1 (source whoosh)
Real Example (source whoosh):


Generated Sample 1 (source whoosh):


Condition 2 (hit)
Real Example (hit):


Generated Sample 1 (hit):


Condition 3 (punch sweetener impact)
Real Example (punch sweetener impact):


Generated Sample 1 (punch sweetener impact):


Condition 4 (ice small)
Real Example (ice small):


Generated Sample 1 (ice small):


Condition 5 (fire thrower)
Real Example (fire thrower):


Generated Sample 1 (fire thrower):


Condition 6 (hard)
Real Example (hard):


Generated Sample 1 (hard):


Condition 7 (chunks falling impact)
Real Example (chunks falling impact):


Generated Sample 1 (chunks falling impact):


Condition 8 (paper)
Real Example (paper):


Generated Sample 1 (paper):


Condition 9 (whoosh)
Real Example (whoosh):


Generated Sample 1 (whoosh):


Condition 10 (source paper)
Real Example (source paper):


Generated Sample 1 (source paper):


# Realism

**Click in the next cell and hit Shift + Enter to generate the audio for the realism section of the questionaire.**

In [12]:
audio_cond_pairs = get_rand_audio_cond_pairs('./data/Preprocessed/Magic', 10)
audio_set = gen_random_samples(audio_cond_pairs, 1, './train_magic_nn_137k_opt_reset/infer/infer.meta', './train_magic_nn_137k_opt_reset/model.ckpt-136573')

audio_label_pairs = []
for i in range(len(audio_cond_pairs)):
    if np.random.randint(2):
        audio_label_pair = (np.array(audio_cond_pairs)[i,0], 'Real - {}'.format(np.array(audio_cond_pairs)[i,1].decode()))
    else:
        audio_label_pair = (np.array(audio_set)[i,0], 'Fake/Generated - {}'.format(np.array(audio_set)[i,1].decode()))

    audio_label_pairs.append(audio_label_pair)



INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Restoring parameters from ./train_magic_nn_137k_opt_reset/model.ckpt-136573


**Click in the next cell and hit Shift + Enter to show the sound effects for the realism section of the questionaire**

In [13]:
for i, audio_label_pair in enumerate(audio_label_pairs):
    print('Sound Effect {}'.format(i + 1))
    display(Audio(audio_label_pair[0].flatten(), rate=16000))

Sound Effect 1


Sound Effect 2


Sound Effect 3


Sound Effect 4


Sound Effect 5


Sound Effect 6


Sound Effect 7


Sound Effect 8


Sound Effect 9


Sound Effect 10


**Click in the next cell and hit Shift + Enter to show the real labels for each sound effect. Only do this after you have answered the first part of the realism section of the questionaire.**

In [14]:
for i, audio_label_pair in enumerate(audio_label_pairs):
    print('Sound Effect {} - {}'.format(i + 1, audio_label_pair[1]))
    display(Audio(audio_label_pair[0].flatten(), rate=16000))

Sound Effect 1 - Real - hit


Sound Effect 2 - Real - fast projectile


Sound Effect 3 - Real - ice


Sound Effect 4 - Fake/Generated - snow


Sound Effect 5 - Real - roaring


Sound Effect 6 - Fake/Generated - fast


Sound Effect 7 - Fake/Generated - weapon small


Sound Effect 8 - Real - whoosh swirl fast


Sound Effect 9 - Real - paper book flip


Sound Effect 10 - Real - impact snow


# Condition Matching

In [20]:
cond_texts = []
for i in range(10):
    cond_texts.append(input('Enter Conditioning Text for Sound Effect {}: '.format(i+1)).lower())

Enter Conditioning Text for Sound Effect 1: turkey from hell
Enter Conditioning Text for Sound Effect 2: robot turkey
Enter Conditioning Text for Sound Effect 3: extreme robot turkey
Enter Conditioning Text for Sound Effect 4: death turkey
Enter Conditioning Text for Sound Effect 5: wind turkey
Enter Conditioning Text for Sound Effect 6: extreme turkey
Enter Conditioning Text for Sound Effect 7: short turkey
Enter Conditioning Text for Sound Effect 8: hard turkey
Enter Conditioning Text for Sound Effect 9: banana turkey
Enter Conditioning Text for Sound Effect 10: turkey


In [21]:
audio_set = generate(cond_texts, 5, './train_nn_linear_360k_opt_reset/infer/infer.meta', './train_nn_linear_360k_opt_reset/model.ckpt-360023')

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:Restoring parameters from ./train_nn_linear_360k_opt_reset/model.ckpt-360023


In [22]:
for i, audio_label_pair in enumerate(audio_set):
    print('Sound Effect {} - {}'.format(i + 1, audio_label_pair[1]))
    display(Audio(audio_label_pair[0].flatten(), rate=16000))

Sound Effect 1 - turkey from hell


Sound Effect 2 - turkey from hell


Sound Effect 3 - turkey from hell


Sound Effect 4 - turkey from hell


Sound Effect 5 - turkey from hell


Sound Effect 6 - robot turkey


Sound Effect 7 - robot turkey


Sound Effect 8 - robot turkey


Sound Effect 9 - robot turkey


Sound Effect 10 - robot turkey


Sound Effect 11 - extreme robot turkey


Sound Effect 12 - extreme robot turkey


Sound Effect 13 - extreme robot turkey


Sound Effect 14 - extreme robot turkey


Sound Effect 15 - extreme robot turkey


Sound Effect 16 - death turkey


Sound Effect 17 - death turkey


Sound Effect 18 - death turkey


Sound Effect 19 - death turkey


Sound Effect 20 - death turkey


Sound Effect 21 - wind turkey


Sound Effect 22 - wind turkey


Sound Effect 23 - wind turkey


Sound Effect 24 - wind turkey


Sound Effect 25 - wind turkey


Sound Effect 26 - extreme turkey


Sound Effect 27 - extreme turkey


Sound Effect 28 - extreme turkey


Sound Effect 29 - extreme turkey


Sound Effect 30 - extreme turkey


Sound Effect 31 - short turkey


Sound Effect 32 - short turkey


Sound Effect 33 - short turkey


Sound Effect 34 - short turkey


Sound Effect 35 - short turkey


Sound Effect 36 - hard turkey


Sound Effect 37 - hard turkey


Sound Effect 38 - hard turkey


Sound Effect 39 - hard turkey


Sound Effect 40 - hard turkey


Sound Effect 41 - banana turkey


Sound Effect 42 - banana turkey


Sound Effect 43 - banana turkey


Sound Effect 44 - banana turkey


Sound Effect 45 - banana turkey


Sound Effect 46 - turkey


Sound Effect 47 - turkey


Sound Effect 48 - turkey


Sound Effect 49 - turkey


Sound Effect 50 - turkey
