In [1]:
import pathlib 
import os
import regex as re
import librosa
import pandas as pd 
import numpy as np

In [2]:
folder = "Selection Tables"
labels = [str(file) for file in pathlib.Path(os.path.join(os.getcwd(), folder)).glob("*.txt")]

In [3]:
keys = []

for file in labels:
    with open(file, "r") as f:
        df = pd.read_csv(f, delimiter = "\t")
        unique_keys = df["Label"].unique().tolist()
        keys.extend(unique_keys)

In [4]:
print(f"Unique keys: {set(keys)}")

Unique keys: {nan, 'nothing', 'mm', 'other', 'sc', 'bird', 'human', 'insect', 'gc'}


In [5]:
df.head()

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Begin File,Begin Path,Label
0,1,Spectrogram 1,1,7.158055,7.858694,2998.7,3740.0,20200929_033839.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird
1,2,Spectrogram 1,1,10.526513,11.065466,3508.3,4172.4,20200929_033839.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird
2,3,Spectrogram 1,1,30.72169,31.031588,6652.3,7810.6,20200929_033839.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird
3,4,Spectrogram 1,1,34.642574,34.898577,6714.1,7733.4,20200929_033839.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird
4,5,Spectrogram 1,1,24.833626,25.13005,6853.1,7872.4,20200929_033839.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird


# Extracting path with 1 file

In [6]:
with open(labels[0], "r") as file:
    df = pd.read_csv(file, delimiter = "\t")

In [7]:
sample_path = df["Begin Path"][0]
print(sample_path)

D:\Fauna & Flora International\VietnamAudioStorage - Root\Trung Khanh\2020 Survey\Raw Audio Files\Phase 2 (Deployed - September to October)\D1\01\01b\20200929_032722.WAV


In [8]:
# Current path does not match with audio file name
relative_path = re.findall("Raw Audio Files.+", sample_path)[0]
print("Relative path: ", relative_path)

# Fixed path and join current working directory
absolute_path = os.path.join(os.getcwd(), re.sub("\((.)*\)", "(September to October 2020)", relative_path))
print("Absolute path: ", absolute_path)

Relative path:  Raw Audio Files\Phase 2 (Deployed - September to October)\D1\01\01b\20200929_032722.WAV
Absolute path:  D:\developer\python\gibbon\Raw Audio Files\Phase 2 (September to October 2020)\D1\01\01b\20200929_032722.WAV


# Load audio

In [9]:
def readLabels(path: str, sample_rate: int):
    
    def convertPath(path: str):
        relative_path = re.findall("Raw Audio Files.+", path)[0]
        return os.path.join(os.getcwd(), re.sub("\((.)*\)", "(September to October 2020)", relative_path))
    
    df = pd.read_csv(path, delimiter = "\t")
    df["Begin Path"] = df["Begin Path"].apply(convertPath)
    
    df.drop(columns = ["Selection", "View", "Channel", "Begin File"], inplace = True)
    
    df.columns = ["Start", "End", "Low", "Height", "Path", "Label"]
    
    df["Start"] = df["Start"] * sample_rate
    df["End"] = df["End"] * sample_rate
    return df

In [10]:
test = readLabels(labels[0], 4800)

In [17]:
alpha = 10
jump_seconds = 1
sample_rate = 4800

alpha_converted = alpha * sample_rate

extracted = []

In [22]:
audio, _ = librosa.load(absolute_path, sr = sample_rate)

In [25]:
type(audio)

numpy.ndarray

In [155]:
for index, row in test.iterrows():
    jump = 0
    
    while True:
        start_position = row["Start"] - sample_rate - (jump * jump_seconds * sample_rate)
        end_position = start_position + alpha_converted
        
        print(f"Original start: {row['Start']}, New start: {start_position}")
        print(f"Original end: {row['End']}, New end: {end_position}")
        print("\n")
        
        jump += 1
        
        if end_position <= row["End"]:
            break
        extracted.append(audio[int(start_position):int(end_position)])
    break

Original start: 1293.4877472, New start: -3506.5122528
Original end: 18884.9210736, New end: 44493.4877472


Original start: 1293.4877472, New start: -8306.5122528
Original end: 18884.9210736, New end: 39693.4877472


Original start: 1293.4877472, New start: -13106.5122528
Original end: 18884.9210736, New end: 34893.4877472


Original start: 1293.4877472, New start: -17906.5122528
Original end: 18884.9210736, New end: 30093.4877472


Original start: 1293.4877472, New start: -22706.5122528
Original end: 18884.9210736, New end: 25293.4877472


Original start: 1293.4877472, New start: -27506.5122528
Original end: 18884.9210736, New end: 20493.4877472


Original start: 1293.4877472, New start: -32306.5122528
Original end: 18884.9210736, New end: 15693.4877472




In [119]:
array = np.asarray(extracted, dtype = "object")