In [1]:
import librosa
import librosa.display
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
valid_species = ['Asellia tridens', 'Eptesicus bottae', 'Myotis emarginatus', 'Pipistrellus kuhli', 'Rhinopoma muscatellum', 'Rhyneptesicus nasutus', 'Rousettus aegyptius', 'Taphozous perforatus']

In [3]:
audio_folder_name= r"data\Identified calls"

def getLength(sp,filename):
    audio_path = join(audio_folder_name, sp, filename)
    data, sr = librosa.load(audio_path, sr = 22050)
    return np.size(data,0)/22050

file_names = []
file_length = []
for sp in valid_species:
    curr_file_names = listdir(join(audio_folder_name,sp))
    for f in curr_file_names:
        if f.endswith('.WAV') or f.endswith('.wav'):
            file_names.append(f)
            file_length.append(getLength(sp,f))


In [4]:
df = pd.DataFrame(list(zip(file_names,file_length)),columns=['Filename','Length'])
df.head(4)

Unnamed: 0,Filename,Length
0,ASETRI_058_20180212_D1000.WAV,8.874331
1,ASETRI_058_20180212_D1000_10X.wav,31.90381
2,ASETRI_058_20180212_D240.wav,35.423492
3,ASETRI_20180118_D1000.wav,11.378186


In [7]:
def getSpecies(x):
    part = x.split('_')
    return part[0]

df['Species'] =df['Filename'].apply(getSpecies)

In [8]:
df.head(5)

Unnamed: 0,Filename,Length,Species
0,ASETRI_058_20180212_D1000.WAV,8.874331,ASETRI
1,ASETRI_058_20180212_D1000_10X.wav,31.90381,ASETRI
2,ASETRI_058_20180212_D240.wav,35.423492,ASETRI
3,ASETRI_20180118_D1000.wav,11.378186,ASETRI
4,EPTBOT_064_20180219_D1000.WAV,10.751655,EPTBOT


In [9]:
df.groupby(['Species']).sum()

Unnamed: 0_level_0,Length
Species,Unnamed: 1_level_1
ASETRI,87.579819
EPTBOT,463.257188
MYOEMA,404.946077
PIPKUH,1025.28907
RHIMUS,5306.30644
RHYNAS,859.880499
ROUAEG,394.93941
TAPPER,1477.148571


In [10]:
df.groupby(['Species']).mean()

Unnamed: 0_level_0,Length
Species,Unnamed: 1_level_1
ASETRI,21.894955
EPTBOT,77.209531
MYOEMA,44.994009
PIPKUH,64.080567
RHIMUS,54.145984
RHYNAS,57.325367
ROUAEG,26.329294
TAPPER,46.160893


In [11]:
df.groupby(['Species']).std()

Unnamed: 0_level_0,Length
Species,Unnamed: 1_level_1
ASETRI,13.703255
EPTBOT,82.919556
MYOEMA,37.35776
PIPKUH,70.307293
RHIMUS,60.711622
RHYNAS,53.583431
ROUAEG,15.936114
TAPPER,38.222288


In [12]:
df.groupby(['Species']).min()

Unnamed: 0_level_0,Filename,Length
Species,Unnamed: 1_level_1,Unnamed: 2_level_1
ASETRI,ASETRI_058_20180212_D1000.WAV,8.874331
EPTBOT,EPTBOT_064_20180219_D1000.WAV,10.751655
MYOEMA,MYOEMA_010_20180131_D1000.WAV,8.020998
PIPKUH,PIPKUH_002_20180128_D1000.WAV,7.50898
RHIMUS,RHIMUS_001_20180119_D1000.WAV,7.50898
RHYNAS,RHYNAS_035_20180206_D1000.WAV,8.020998
ROUAEG,ROUAEG_039_20180209_D1000.WAV,10.239637
TAPPER,TAPPER_012_20180204_D1000.WAV,12.62898


In [13]:
df.groupby(['Species']).max()

Unnamed: 0_level_0,Filename,Length
Species,Unnamed: 1_level_1,Unnamed: 2_level_1
ASETRI,ASETRI_20180118_D1000.wav,35.423492
EPTBOT,EPTBOT_065_20180219_D240.wav,232.106122
MYOEMA,MYOEMA_071_20180304_D240.wav,106.5761
PIPKUH,PIPKUH_066_20180222_D1000_10X.WAV,220.159456
RHIMUS,RHIMUS_059_20180212_D240.wav,413.942902
RHYNAS,RHYNAS_069_20180227_D240.wav,172.372789
ROUAEG,ROUAEG_072_20180304_D240.wav,65.427846
TAPPER,TAPPER_034_20180204_D240.wav,167.249615


# Convert to Spectrograms

In [54]:
def plot_signals(signals):
    fig, axes = plt.subplots(nrows=1, ncols=1, sharex=False, sharey=True, figsize=(20,5))
    fig.suptitle('Time Series'+ list(signals.keys())[0], size=16)
    plt.plot(list(signals.values())[0])


In [2]:
def convert_single_audio_to_image(audio_path, image_path):
    data, sr = librosa.load(audio_path, sr = 44100)

    window_width =  0.025 #25 ms  window size 
    sliding = 0.01 #10ms stride 
    spec = librosa.feature.melspectrogram(y=data, sr=sr, n_fft = int(window_width*sr), hop_length =int(sliding*sr), fmax=sr/2)
    
    #Convert amplitude to decibels
    db_spec = librosa.power_to_db(spec, ref=np.max)
    
    for x in range(0, np.size(db_spec,1),300):
        fig = plt.figure()
        #if there is no more data, slide a bit backwards
        if(x+300 > np.size(db_spec,1)):
            limit = np.size(db_spec,1)
            librosa.display.specshow(np.reshape(np.copy(db_spec[:,limit-300:limit]),(128,-1)), sr=sr, hop_length =int(sliding*sr),fmax=sr/2, x_axis='time',y_axis='mel')
        else:
            librosa.display.specshow(np.reshape(np.copy(db_spec[:,x:x+300]),(128,-1)), sr=sr, hop_length =int(sliding*sr),fmax=sr/2, x_axis='time',y_axis='mel')
        
        plt.colorbar()
        file_end = "%d.png" %(x)
        fig.savefig(image_path.replace('.png',file_end))
        plt.close(fig)


In [None]:
def split(split_list):
    a =0
    

In [3]:
valid_species = ['Asellia tridens', 'Eptesicus bottae', 'Myotis emarginatus', 'Pipistrellus kuhli', 'Rhinopoma muscatellum', 'Rhyneptesicus nasutus', 'Rousettus aegyptius', 'Taphozous perforatus']

audio_folder_name= r"data\Identified calls"
image_folder_name=  r"data\speclab"
for sp in valid_species:
    curr_file_names = listdir(join(audio_folder_name,sp))
    print("count of Wav_files in {}= {}".format(len(curr_file_names),sp))
    for f in curr_file_names:
        if f.endswith('.WAV') or f.endswith('.wav'):
            audio_path = join(audio_folder_name,sp,f)
            all_caps_file_name = f.upper()
            image_path = join(image_folder_name,all_caps_file_name.replace('.WAV', '.png'))
            convert_single_audio_to_image(audio_path,image_path)
            #signal ={f:data}
            #plot_signals(signal)


count of Wav_files in 5= Asellia tridens
count of Wav_files in 6= Eptesicus bottae
count of Wav_files in 9= Myotis emarginatus
count of Wav_files in 16= Pipistrellus kuhli
count of Wav_files in 99= Rhinopoma muscatellum
count of Wav_files in 15= Rhyneptesicus nasutus
count of Wav_files in 15= Rousettus aegyptius
count of Wav_files in 32= Taphozous perforatus
