### User Input

In [None]:
training_data_path = "../data_training/"

### Imports

In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import matplotlib.cbook
import IPython.display as ipd
import os
import pathlib 
import librosa
import librosa.display
import scipy.io.wavfile as wav
import scipy.signal as signal

import os 
import scipy
from scipy.io import wavfile
from scipy.fftpack import fft


### Plot Settings

In [None]:
# color default
CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'
color_list = [CB91_Blue, CB91_Green, CB91_Pink, CB91_Purple, CB91_Violet, CB91_Amber]

# matplotlib settings
plt.style.use('ggplot')
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)
plt.rcParams.update({'font.size': 22})
plt.rcParams['figure.facecolor'] = 'black'
plt.rcParams['text.color'] = 'white'

# seaborn settings
sns.set(style='ticks')

# warnings settings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore",category=matplotlib.MatplotlibDeprecationWarning)

# pandas settings
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

### Generate Images of Signal over Time

In [None]:
path_to_raw_data = "../data_augmented"
mammal_names = ['HumpbackWhale', 'Walrus', 'BowheadWhale', 'Fin_FinbackWhale', 'KillerWhale', 'EmptyOcean']

for mammal_name in mammal_names:
    data_path = path_to_raw_data + "/" + mammal_name + "/"

    print(f"processing {mammal_name}")
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(data_path)):

        os.makedirs("../images/" + mammal_name + "/" + "time/", exist_ok=True)
        
        for wav_file in filenames:
            audio_file = os.path.join(dirpath, wav_file)
            print(f"processing file {audio_file}")
            fs, audio_data = wavfile.read(audio_file)
            plt.plot(audio_data)
            plt.title('Raw Audio Signal in Time',size=16)
            plt.xlabel('Time (s)')
            plt.ylabel('Amplitude')
            figure_name = "../images/" + mammal_name + "/" + "time/" + wav_file[:-4] 
            plt.savefig(f'{figure_name}.png')
            plt.clf()

### Generate Images of Signal over Frequency

In [None]:
path_to_raw_data = "../data_augmented"
mammal_names = ['HumpbackWhale', 'Walrus', 'BowheadWhale', 'Fin_FinbackWhale', 'KillerWhale', 'EmptyOcean']

for mammal_name in mammal_names:
    data_path = path_to_raw_data + "/" + mammal_name + "/"

    print(f"processing {mammal_name}")
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(data_path)):

        os.makedirs("../images/" + mammal_name + "/" + "frequency/", exist_ok=True)
        
        for wav_file in filenames:
            audio_file = os.path.join(dirpath, wav_file)
            print(f"processing file {audio_file}")
            fs, audio_data = wavfile.read(audio_file)

            audio_file_len = len(audio_file) 
            audio_freq = fft(audio_data)
            audio_freq = audio_freq[0:int(np.ceil((audio_file_len+1)/2.0))] # half of the spectrum
            freq_magnitude = np.abs(audio_freq) # magnitude
            freq_magnitude = freq_magnitude / float(audio_file_len)
            # power spectrum
            freq_magnitude = freq_magnitude**2
            if audio_file_len % 2 > 0: # ffte odd 
                freq_magnitude[1:len(freq_magnitude)] = freq_magnitude[1:len(freq_magnitude)] * 2
            else:# fft even
                freq_magnitude[1:len(freq_magnitude) -1] = freq_magnitude[1:len(freq_magnitude) - 1] * 2 

            freqAxis = np.arange(0,int(np.ceil((audio_file_len+1)/2.0)), 1.0) * (fs / audio_file_len)
            plt.plot(freqAxis/1000.0, 10*np.log10(freq_magnitude)) # power spectrum
            plt.title('Audio Signal in Frequency',size=16)
            plt.xlabel('Frequency (kHz)')
            plt.ylabel('Power Spectrum (dB)')
            figure_name = "../images/" + mammal_name + "/" + "frequency/" + wav_file[:-4] 
            plt.savefig(f'{figure_name}.png')
            plt.clf()

### Generate Images of Spectrograms

In [None]:
path_to_raw_data = "../data_marine_raw"
mammal_names = ['HumpbackWhale', 'Walrus', 'BowheadWhale', 'Fin_FinbackWhale', 'KillerWhale', 'EmptyOcean']
columns= len(mammal_names)
fig, ax = plt.subplots(2,1,figsize=(22,17))

for mammal_name in mammal_names:
    data_path = path_to_raw_data + "/" + mammal_name + "/"
    max_frequencies_list = []
    print(f"processing {mammal_name}")
    os.makedirs("../images/" + mammal_name + "/" + "spectrograms/", exist_ok=True)
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(data_path)):
        c = 0

        for wav_file in filenames:
            audio_file = os.path.join(dirpath, wav_file)
            print(f"processing file {audio_file}")
            rate, data = wavfile.read(audio_file)
            f, t, Sxx = signal.spectrogram(data, fs=rate, nperseg=512, nfft=2048)
            d = 20*np.log10(Sxx+1e-10)
            ax[c].pcolormesh(t,f,d, vmin=-1e1,vmax=d.max())
            ax[c].set_title(mammal_name, fontsize=20)
            if not c:
                ax[c].set_xlabel("time")
                ax[c].set_ylabel("frequency")
            else: ax[c].axis("off")
            
            norm_data = (data -data.mean())/data.std()
            ax[c+1].plot(norm_data,lw=0.03)
            ax[c+1].axis("off") 

            plt.subplots_adjust(wspace=0.05, hspace=0.1)
            figure_name = "../images/" + mammal_name + "/" + "spectrograms/" + wav_file[:-4] 
            plt.savefig(f'{figure_name}.png')
            ax[0].clear()
            ax[1].clear()



### Generate Plots of Highest Audible Frequency for a Mammal Call

In [None]:
path_to_raw_data = "../data_augmented"
mammal_names = ['HumpbackWhale', 'Walrus', 'BowheadWhale', 'Fin_FinbackWhale', 'KillerWhale', 'EmptyOcean']
max_frequencies_dict = dict()

for mammal_name in mammal_names:
    data_path = path_to_raw_data + "/" + mammal_name + "/"
    max_frequencies_list = []
    print(f"processing {mammal_name}")
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(data_path)):

        for wav_file in filenames:
            audio_file = os.path.join(dirpath, wav_file)
            print(f"processing file {audio_file}")
            fs, audio_data = wavfile.read(audio_file)

            l_audio = len(audio_data.shape)
            N = audio_data.shape[0]
            secs = N / float(fs)
            Ts = 1.0/fs 
            t = np.arange(0, secs, Ts) 
            FFT = abs(scipy.fft.fft(audio_data))
            FFT_side = FFT[range(N//2)] 
            freqs = scipy.fftpack.fftfreq(audio_data.size, t[1]-t[0])
            fft_freqs = np.array(freqs)
            freqs_side = freqs[range(N//2)] 
            fft_freqs_side = np.array(freqs_side)

            volume=np.array(abs(FFT_side))
            audible=np.where(volume>5)

            HighestAudibleFrequency=max(freqs_side[audible])
            max_frequencies_list.append(HighestAudibleFrequency)
    max_frequencies_dict[mammal_name] = max_frequencies_list

for mammal, freqs in max_frequencies_dict.items(): 
    plt.plot(freqs)
    plt.title('Maximum Frequencies in Mammal Call',size=16)
    plt.xlabel('Mammal Call Number')
    plt.ylabel('Frequency (Hz)')
    figure_name = "../images/" + mammal + "/" + "max_frequencies"
    plt.savefig(f'{figure_name}.png')
    plt.clf()