In [None]:
%load_ext autoreload
%autoreload 2

from __future__ import division
import numpy as np
import scipy.signal as sg
from scipy.fftpack import rfft, fftfreq
from scipy.io import wavfile
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline
import matplotlib.colors as colors
from pydub import AudioSegment
import math as m
import tensorflow as tf
import utilities as util

In [None]:
bells_fs,        bells_wav,        bells_freq,        bells_time,        bells_spec,        bells_argsort,        = util.gather_data('../samples/HandBells.wav')
chords_fs,       chords_wav,       chords_freq,       chords_time,       chords_spec,       chords_argsort,       = util.gather_data('../samples/Chords.wav')
piano_mel_fs,    piano_mel_wav,    piano_mel_freq,    piano_mel_time,    piano_mel_spec,    piano_mel_argsort,    = util.gather_data('../samples/Piano.wav')
guitar_tun_fs,   guitar_tun_wav,   guitar_tun_freq,   guitar_tun_time,   guitar_tun_spec,   guitar_tun_argsort,   = util.gather_data('../samples/Guitar.wav')
piano_note_fs,   piano_note_wav,   piano_note_freq,   piano_note_time,   piano_note_spec,   piano_note_argsort,   = util.gather_data('../samples/Piano_C_Major_Note.wav')
tbone_note_fs,   tbone_note_wav,   tbone_note_freq,   tbone_note_time,   tbone_note_spec,   tbone_note_argsort,   = util.gather_data('../samples/Trombone_C_Major_Note.wav')
guitar_note_fs,  guitar_note_wav,  guitar_note_freq,  guitar_note_time,  guitar_note_spec,  guitar_note_argsort,  = util.gather_data('../samples/Guitar_C_Major_Note.wav')
piano_scale_fs,  piano_scale_wav,  piano_scale_freq,  piano_scale_time,  piano_scale_spec,  piano_scale_argsort,  = util.gather_data('../samples/Piano_C_Major_Scale.wav')
tbone_scale_fs,  tbone_scale_wav,  tbone_scale_freq,  tbone_scale_time,  tbone_scale_spec,  tbone_scale_argsort,  = util.gather_data('../samples/Trombone_C_Major_Scale.wav')
guitar_scale_fs, guitar_scale_wav, guitar_scale_freq, guitar_scale_time, guitar_scale_spec, guitar_scale_argsort, = util.gather_data('../samples/Guitar_C_Major_Scale.wav')

In [None]:
note_freq = util.frequencies('A0', 'D8')
df = np.diff(note_freq)

smallest_nps = 44100/24 * 4/3 
smallest_nps = 2**int(np.log2(smallest_nps))


nps = np.maximum(np.int64(2**np.ceil(np.log2(44100/df))), smallest_nps)

nps_uniq = list(np.unique(nps))


a = 2**(1/12)
lo_factor = 0.5*(1+1/a)
hi_factor = 0.5*(1+a) 

data = []
for nperseg in nps_uniq:
    data.append(sg.spectrogram(wav, 44100, nperseg=nperseg, noverlap=nperseg-smallest_nps))



full_spec = np.zeros((len(note_freq)-1, len(data[0][1])))
for i,(nf,nperseg) in enumerate(zip(note_freq, nps)):
    nps_ind = nps_uniq.index(nperseg)
    freq, time, spec = data[nps_ind]
    lo = nf*lo_factor
    hi = nf*hi_factor
    ind = (freq>=lo)&(freq<=hi)
#         print(nf, ind.sum(), abs(freq[ind] - nf).min() / nf)
    peak = normal_peak(spec[ind,:], freq[ind])
    full_spec[i,:len(time)] = peak

return note_freq[:-1], data[0][1], full_spec

In [None]:
note_freq = util.frequencies('A0', 'D8')
df = np.diff(note_freq)

smallest_nps = 44100/24 * 4/3 
smallest_nps = 2**int(np.log2(smallest_nps))
nps = np.maximum(np.int64(2**np.ceil(np.log2(44100/df))), smallest_nps)
nps_uniq = list(np.unique(nps))
a = 2**(1/12)
lo_factor = 0.5*(1+1/a)
hi_factor = 0.5*(1+a) 
data = []

In [None]:
# for nperseg in nps_uniq:
#     data.append(sg.spectrogram(wav, 44100, nperseg=nperseg, noverlap=nperseg-smallest_nps))

# A batch of float32 time-domain signals in the range [-1, 1] with shape
# [batch_size, signal_length]. Both batch_size and signal_length may be unknown.
signals = tf.placeholder(tf.float32)

# Compute a [batch_size, ?, 128] tensor of fixed length, overlapping windows
# where each window overlaps the previous by 75% (frame_length - frame_step
# samples of overlap).
frames = tf.contrib.signal.frame(signals, frame_length=np.int32(nps_uniq[0]),
                                 frame_step=np.int32(2*smallest_nps-nps_uniq[0]))


# `stfts` is a complex64 Tensor representing the Short-time Fourier Transform of
# each signal in `signals`. Its shape is [batch_size, ?, fft_unique_bins]
# where fft_unique_bins = fft_length // 2 + 1 = 513.
stfts = tf.contrib.signal.stft(signals, frame_length=np.int32(nps_uniq[0]),
                               frame_step=np.int32(2*smallest_nps-nps_uniq[0]),
                               fft_length=np.int32(nps_uniq[0]))

# A power spectrogram is the squared magnitude of the complex-valued STFT.
# A float32 Tensor of shape [batch_size, ?, 513].
power_spectrograms = tf.real(stfts * tf.conj(stfts))

# An energy spectrogram is the magnitude of the complex-valued STFT.
# A float32 Tensor of shape [batch_size, ?, 513].
magnitude_spectrograms = tf.abs(stfts)


# `spectrogram_patches` is a [batch_size, ?, 64, 129] tensor containing a
# variable number of [64, 129] spectrogram patches per batch item.
spectrogram_patches = tf.contrib.signal.frame(
    magnitude_spectrograms, frame_length=64, frame_step=16, axis=1)


# Reconstructs `signals` from `frames` produced in the above example. However,
# the magnitude of `reconstructed_signals` will be greater than `signals`.
reconstructed_signals = tf.contrib.signal.overlap_and_add(frames, frame_step=32)


In [None]:
with tf.Session() as sess:
    spects = sess.run(magnitude_spectrograms, feed_dict={signals:guitar_note_wav})

In [None]:
spects

In [None]:
spects.shape

In [None]:
f, t, sp = sg.spectrogram(guitar_note_wav, 44100, nperseg=nps_uniq[0], noverlap=nps_uniq[0]-smallest_nps)

In [None]:
sp

In [None]:
# plt.hist(sp.ravel())
util.display_spec(t, f, sp, ylim=100)
plt.colorbar()

In [None]:
# plt.hist(spects.ravel())
util.display_spec(t, f, spects.T/2, ylim=100)
plt.colorbar()

In [None]:
guitar_note_spec.shape

In [None]:


# `magnitude_spectrograms` is a [batch_size, ?, 129] tensor of spectrograms. We
# would like to produce overlapping fixed-size spectrogram patches; for example,
# for use in a situation where a fixed size input is needed.
magnitude_spectrograms = tf.abs(tf.contrib.signal.stft(
    signals, frame_length=256, frame_step=64, fft_length=256))


In [None]:
full_spec = np.zeros((len(note_freq)-1, len(data[0][1])))
for i,(nf,nperseg) in enumerate(zip(note_freq, nps)):
    nps_ind = nps_uniq.index(nperseg)
    freq, time, spec = data[nps_ind]
    lo = nf*lo_factor
    hi = nf*hi_factor
    ind = (freq>=lo)&(freq<=hi)
    peak = normal_peak(spec[ind,:], freq[ind])
    full_spec[i,:len(time)] = peak