In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os 
import scipy.io.wavfile as wavfile
import noisereduce as nr 
import pandas as pd
from scipy.signal import butter, filtfilt
import soundfile as sf
import numpy as np
import librosa.display
from scipy.interpolate import interp1d
from pydub import AudioSegment


# This code was used to determine the length of the clips that are fed to the neural networks. It was also used to cut the clips larger than the decided length into the length. 

Running this code would give that there are no clips larger than 10 seconds because the clips larger than 10 seconds were removed from the files after they were handled and either cut down or splitted into new files. 
To see the original files, go to 'data/not_needed_sounds/longer_10_seconds_clips_from_ELP'. 

In [None]:
def load_data(file_name): 
    file_contents = tf.io.read_file(file_name) #retuns a string 
    wave, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1) # transforms string into actual wav
    wave = wave - tf.reduce_mean(wave) # remove the mean 
    wave = tf.squeeze(wave, axis= -1) #removes axis 
    #wave = tf.cast(wave * 32768, tf.float32) # value is scaled to look like int16, however, type is kept as float32 for compatibility issues

    return wave, sample_rate

In [None]:
lengths = []
# From the elephant listening project 
counter = 0 
for file in os.listdir(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Clips')):
    if '.wav' in file: 
        tensor_wave, sample_rate = load_data(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Clips', file))
        lengths.append(len(tensor_wave))

# Lengths need to be divided by /sample_rate= 8000 to get their length in seconds 

In [None]:
# From https://data.mendeley.com/datasets/x48cwz364j/3 
for file in os.listdir(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Sounds_background')):
    tensor_wave, sample_rate = load_data(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Sounds_background', file))
    lengths.append(len(tensor_wave))

for file in os.listdir(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Sounds_gunshots')):
    if '.WAV' in file: #Adding this becuase there's a hidden file in the folder  
        tensor_wave, sample_rate = load_data(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Sounds_gunshots', file))
        lengths.append(len(tensor_wave))

In [None]:
longest = max(lengths)/8000 # the longest audio has 32.45 seconds # 259624
mean = tf.math.reduce_mean(lengths)/8000


Investigate how many clips longer than 10 seconds exist

In [None]:
# keeping up to seconds audios 
count = len([element for element in lengths if element > 10*8000])
count

Extract their paths so that we can shorten them down 

In [None]:
paths_greater_10 = [] # 38 
# Only considering data from the elephant listening project, as the extra data is all 4.09 seconds long. 
for file in os.listdir(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Clips')):
    tensor_wave, sample_rate = load_data(os.path.join('/Users', 'rosameliacarioni','University','Thesis','code','data', 'Clips', file))
    if len(tensor_wave)> 10*8000:
        paths_greater_10.append(file)
    lengths.append(len(tensor_wave))

Creating new clips 

In [None]:
# time in seconds 
def clip_audio_and_save_till_end (time_start, file_name, extra_name):
    audio = AudioSegment.from_wav(file_name)

    file_destintion =  file_name.replace('.wav', '') +  extra_name + '.wav'
    second_to_milliseconds = 1000
    time_end = len(audio) / second_to_milliseconds
    if (time_end - time_start > 10): 
        clip = audio[time_start*second_to_milliseconds: time_start*second_to_milliseconds + 10*second_to_milliseconds]
    else:
        clip = audio[time_start*second_to_milliseconds: time_start*second_to_milliseconds + time_end*second_to_milliseconds]
    
    clip.export(file_destintion, format='wav')

In [None]:
# time in seconds 
def clip_audio_and_save (time_start, time_end, file_name, extra_name):
    audio = AudioSegment.from_wav(file_name)

    file_destintion =  file_name.replace('.wav', '') +  extra_name + '.wav'
    second_to_milliseconds = 1000

    clip = audio[time_start*second_to_milliseconds: time_end*second_to_milliseconds]

    clip.export(file_destintion, format='wav')

I manually looked in the lengths[] and selected the clips to extract such that:
- For files without gunshots, similar size length clips were created
- For files with gunshots, the entire gunshot was kept in one clip. 

0. other90 is divided into 2 clips of 10 seconds each
1. other84 is divided into 2 clips of 10 seconds each
2. other85
3. other91 is divided into 3 clips of 10 seconds each 
4. other87 
5. other93 is divided into 2 clips of 6 and 7 seconds 
6. other78 is divided into 3 clips of 10 seconds 
7. other1 is divided into 2 clips of 5 and 6 seconds 
8. other79 is divided into 2 clips of 10 seconds each 
9. other92 is divied into 2 clips 
10. other86 is divided into 2 clips 
11. other82 is divided into 3
12. ecoguns839: there are 3 gunshots, so the clips will be dividede into 3
13. other83: divided into 2  
14. other81: divided into 2 
15. ecoguns813: there are 6 gunshots but 3 are one next to the other, so I will divide it into 4 clips 
16. other43: divided into 2 
17. other80: divided into 2
18. ecoguns848:  there are 7 gunshots , divided into 4 clips 
19. ecoguns669: there are 6 gunshots, divided into 5 clips 
20. ecoguns695: there are 4 gunshots, divided into 4 clips 
21. ecoguns869: there are 5 gunshots, divided into 4 clips
22. ecoguns663: there are 3 gunshots and the audio is just a bit longer than 10 seconds, so we keep it as 10 
23. ecoguns931: there are 6 gunshots and the audio is about 12 seconds, so i will split it in 2 
24. other72
25. ecoguns822 ther are 3 gunshots and the audio is just a bit longer than 10 seconds, so we keep it as 10 
26. other71 
27. pnnn2: there are about 8 gunshots 
28. ecoguns763
29. other75 
30. other61 
31. other49
32. other88 
33. other77 
34. other63
35. other62
36. other76
37. other89