# Utilities for Thesis
### Figure generators and such

# Generic Imports

In [None]:
import os
import math
import numpy as np
import madmom
from scipy.io import wavfile
import matplotlib.pyplot as plt

# Initialization

### Debugger

In [None]:
debug = True

class Debugger:
    def __init__(self, debug):
        self.debug = debug
        
    def log(self, *msg):
        if(self.debug):
            print(msg)
            
dbg = Debugger(debug)
dbg.log('debugger initialized')

### Load Audio

In [None]:
audio_len = 5
file_name = 'zztop_badtothebone_mono_5sec.wav'
audio_path_relative = '../../audio_for_thesis'

audio_path = os.path.join(os.getcwd(), audio_path_relative)
audio_path_absolute = os.path.abspath(audio_path)
if not os.path.exists(audio_path_absolute):
    raise Exception("Audio file path doesnt exist:", audio_path_absolute)
    
audio_path_full = os.path.join(audio_path_absolute, file_name)
if not os.path.exists(audio_path_full):
    raise Exception("Audio file doesnt exist:", audio_path_full)

fs, audio_buffer = wavfile.read(audio_path_full)

assert fs == 44100, 'Sampling rate should be 44100 Hz'
assert len(audio_buffer.shape) == 1, 'Audio should be mono'
assert len(audio_buffer) == audio_len * fs, 'Audio should be exactly %d seconds long' %(audio_len)

dbg.log(audio_path_full)
dbg.log('audio buffer shape:', audio_buffer.shape, 'sampling rate:', fs)

# Figures

### Time Domain Audio Signal Representation

In [None]:
plt.plot(audio_buffer/audio_buffer.max())
audio_tics = np.arange(0, len(audio_buffer), fs/2)
plt.xticks(audio_tics, ["{:.1f}".format(tic/fs) for tic in audio_tics], rotation='45')
plt.xlim([0, len(audio_buffer)])
plt.xlabel('Time (Seconds)')
plt.ylabel('Amplitude (Normalized)');

### Frequency Domain Audio Signal Representation

In [None]:
'''
# Original:

frame_size = 2048
start_frame = 2*fs

frame = audio_buffer[start_frame:start_frame+frame_size]
spectrum = np.fft.fft(frame)

abs_spec_size = int(frame_size/2+1)
abs_spec = np.abs(spectrum)[:abs_spec_size]

fft_freqs = np.fft.fftfreq(frame_size)*fs
freq_ticks = np.arange(0, abs_spec_size, 128, dtype=int)
freq_tick_labels = ["{:.1f}Hz".format(np.abs(fft_freqs[idx])) for idx in freq_ticks]
print(fft_freqs[1024])


plt.plot(abs_spec/abs_spec.max())
plt.xticks(freq_ticks, freq_tick_labels, rotation="45");
'''

frame_size = audio_len * fs # 220500

frame = audio_buffer[0:frame_size] # take whole 5sec clip as frame
spectrum = np.fft.fft(frame) # fft resolution is same as size of audio_buffer (220500)

abs_spec_size = int(frame_size/2+1) # half cause mirrored (110251)
abs_spec = np.abs(spectrum)[:abs_spec_size] # absolute values of half

# frequency bin centers [0...0.5,-0.5...-0]
# result in requency values when multiplied by sampling rate (cause same as resolution) (same as specifying d=1/sampling_rate)
# with 220500 values
# will only be used until index half, cause they are then mirrored in the negative
fft_freqs = np.fft.fftfreq(frame_size)*fs
dbg.log(fft_freqs[110250])

# indices to be used, only go until 110251, cause of mirroring
# 110251 / 7 = 15750.14, round down to get 8 indices instead of 7
freq_ticks = np.arange(0, abs_spec_size, 15750, dtype=int)
# labels (with Hz as value) for the corresponding indices
freq_tick_labels = ["{:.1f}".format(np.abs(fft_freqs[idx])) for idx in freq_ticks]

plt.plot(abs_spec/abs_spec.max())
plt.xticks(freq_ticks, freq_tick_labels, rotation="45")

plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude / Power (Normalized)')

dbg.log('spectrum resolution:', abs_spec.shape)
dbg.log('fft frequencies:', fft_freqs.shape)
dbg.log('indices to be used for freq. values', freq_ticks)
dbg.log('frequencies at above indices taken from fft frequencies', freq_tick_labels)

### Hann Window

### Time/Frequency Domain Audio Signal Representation

### Mel Scale

### Decibel Scale