In [4]:
import sys # Python system library needed to load custom functions
import math # module with access to mathematical functions
import os # for changing the directory

import numpy as np  # for performing calculations on numerical arrays
import pandas as pd  # home of the DataFrame construct, _the_ most important object for Data Science

from IPython.display import Audio # for listening to our insects
import IPython
from scipy.fft import fft # function to calculate Fast Fourier Transform

import matplotlib.pyplot as plt  # allows creation of insightful plots
import seaborn as sns # another library to make even more beautiful plots

import torch
import torchaudio

sys.path.append('../../src') # add the source directory to the PYTHONPATH. This allows to import local functions and modules.
# enable rendering plots under the code cell that created it
%matplotlib inline

from eda_utils import show_sampling, signal_generator, plot_random_spec, plot_spec, plot_waveform # functions to create plots for and from audio data
from gdsc_utils import download_directory, PROJECT_DIR # function to download GDSC data from S3 bucket and our root directory
from config import DEFAULT_BUCKET  # S3 bucket with the GDSC data

os.chdir(PROJECT_DIR) # changing our directory to root

In [3]:
testing_path = 'data/train/Atrapsaltacorticina_GBIF1946322682_IN18591645_27630.wav'

In [4]:
plot_waveform(testing_path, 44100)

In [6]:
waveform1, samplerate1 =  torchaudio.load(testing_path)

In [10]:
print(waveform1[0].numpy().shape)
print(abs(waveform1[0].numpy()))

In [41]:
def get_max_amplitude_window_no_scanning(path: str,
                                   window_length_sec):
    '''
    returns index of waveform that starts the window of length window_length_sec*samplerate, with the highest summed amplitude 
    '''

    
    waveform, samplerate =  torchaudio.load(path)
    #calculate boolean cut-off vector
    
    window_length = math.floor(window_length_sec * samplerate)
    
    if window_length >= waveform[0].numpy().shape[0]:
        return 0
    
    max_sum = 0
    max_index = 0
    
    for x in range(waveform[0].numpy()[:-window_length].shape[0]):
        if np.sum(abs(waveform[0].numpy()[x:x-window_length])) > max_sum:
            max_sum = np.sum(waveform[0].numpy()[x:x-window_length])
            max_index = x
            
    return max_index
    

In [13]:
get_max_amplitude_window_index(testing_path, 5)

In [14]:
205995/44100

In [2]:
def get_max_amplitude_window_index(path: str,
                                   window_length_sec,
                                   scan_param = 50, 
                                   verbose = True):
    '''
    Returns index of waveform that starts the window of length window_length_sec*samplerate, with the highest summed amplitude.
    only scans at certain scan intervals, to speed up the calculation

    Args:
        path (str): path to data, as in torchaudio.load
        window_length_sec: window length to calculate sum over absolute amplitudes
        scan_param: samplerate should be divisible by scan_param
        verbose (bool): to print return index in seconds

    Returns:
        max_index (int): start index of window with max amplitudes 
    '''

    waveform, samplerate =  torchaudio.load(path)
    
    waveform_length = waveform[0].numpy().shape[0]
    window_length = math.floor(window_length_sec * samplerate)
    
    if window_length >= waveform_length:
        return 0
    
    #divide available waveform length by scan_param, to construct scan array
    scan_length = math.floor((waveform_length-window_length)/scan_param)
    
    max_sum = 0
    max_index = 0
    
    #in every scan interval: calculate sum over window and save max
    for x in range(scan_length):
        tmp = np.sum(abs(waveform[0].numpy()[x*scan_param:x*scan_param+window_length]))
        if tmp > max_sum:
            max_sum = tmp
            max_index = x*scan_param
    
    if verbose:
        print('window starts at:',max_index/samplerate, 'seconds')
    return max_index

In [30]:
get_max_amplitude_window_index(testing_path, 10)

In [51]:
print(len(waveform1[0]))

In [60]:
get_max_amplitude_window_index(testing_path, 5, 11025)

In [61]:
get_max_amplitude_window_index(testing_path, 5, 100)

In [76]:
testing_path2 = 'data/val/Aleetacurvicosta_GBIF3039381926_IN68785661_161247.wav'

In [77]:
get_max_amplitude_window_index(testing_path2, 5, 100)

In [78]:
plot_waveform(testing_path2, 44100)

In [79]:
testing_path3 = 'data/val/Barbitistesyersini_XC752462-dat047-010_edit7.wav'

In [80]:
get_max_amplitude_window_index(testing_path3, 5, 100)

In [81]:
plot_waveform(testing_path3, 44100)

In [82]:
testing_path4 = 'data/val/Chorthippusbrunneus_XC751370-dat001-021_edit2.wav'

In [83]:
get_max_amplitude_window_index(testing_path4, 5, 100)

In [84]:
plot_waveform(testing_path4, 44100)

In [85]:
testing_path5 = 'data/train/Chorthippusalbomarginatus_XC751375-dat001-029.wav'

In [86]:
get_max_amplitude_window_index(testing_path5, 5, 100)

In [87]:
plot_waveform(testing_path5, 44100)

In [88]:
testing_path6 = 'data/train/Chorthippusalbomarginatus_XC751376-dat001-030.wav'
get_max_amplitude_window_index(testing_path6, 5, 100)

In [89]:
plot_waveform(testing_path6, 44100)

In [5]:
testing_path7 = 'data/train/Chorthippusalbomarginatus_XC751400-dat023-001.wav'
get_max_amplitude_window_index(testing_path7, 5, 100)

In [None]:
#this restarts the kernel everytime: 56.9 MB -> too big to plot
#plot_waveform(testing_path7, 44100)