In [115]:
import pathlib 
import os
import regex as re
import librosa
import pandas as pd 
import numpy as np

In [16]:
folder = "Selection Tables"
labels = [str(file) for file in pathlib.Path(os.path.join(os.getcwd(), folder)).glob("*.txt")]

# Extracting path with 1 file

In [138]:
with open(labels[0], "r") as file:
    df = pd.read_csv(file, delimiter = "\t")

In [149]:
df.head()

Unnamed: 0,Selection,View,Channel,Begin Time (s),End Time (s),Low Freq (Hz),High Freq (Hz),Begin File,Begin Path,Label
0,1,Spectrogram 1,1,0.269477,3.934359,1002.4,1806.2,20200929_032722.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird
1,2,Spectrogram 1,1,8.29988,11.264122,1091.7,1667.3,20200929_032722.WAV,D:\Fauna & Flora International\VietnamAudioSto...,mm
2,3,Spectrogram 1,1,21.679146,23.296005,1240.5,1578.0,20200929_032722.WAV,D:\Fauna & Flora International\VietnamAudioSto...,bird
3,4,Spectrogram 1,1,29.170596,32.619896,1111.5,1647.4,20200929_032722.WAV,D:\Fauna & Flora International\VietnamAudioSto...,mm
4,5,Spectrogram 1,1,26.960887,27.877108,1220.7,1439.0,20200929_032722.WAV,D:\Fauna & Flora International\VietnamAudioSto...,other


In [22]:
sample_path = df["Begin Path"][0]
print(sample_path)

D:\Fauna & Flora International\VietnamAudioStorage - Root\Trung Khanh\2020 Survey\Raw Audio Files\Phase 2 (Deployed - September to October)\D1\01\01b\20200929_032722.WAV


In [77]:
# Current path does not match with audio file name
relative_path = re.findall("Raw Audio Files.+", sample_path)[0]
print("Relative path: ", relative_path)

# Fixed path and join current working directory
absolute_path = os.path.join(os.getcwd(), re.sub("\((.)*\)", "(September to October 2020)", relative_path))
print("Absolute path: ", absolute_path)

Relative path:  Raw Audio Files\Phase 2 (Deployed - September to October)\D1\01\01b\20200929_032722.WAV
Absolute path:  D:\developer\python\gibbon\Raw Audio Files\Phase 2 (September to October 2020)\D1\01\01b\20200929_032722.WAV


# Load audio

In [150]:
def readLabels(path: str, sample_rate: int):
    
    def convertPath(path: str):
        relative_path = re.findall("Raw Audio Files.+", path)[0]
        return os.path.join(os.getcwd(), re.sub("\((.)*\)", "(September to October 2020)", relative_path))
    
    df = pd.read_csv(path, delimiter = "\t")
    df["Begin Path"] = df["Begin Path"].apply(convertPath)
    
    df.drop(columns = ["Selection", "View", "Channel", "Begin File"], inplace = True)
    
    df.columns = ["Start", "End", "Low", "Height", "Path", "Label"]
    
    df["Start"] = df["Start"] * sample_rate
    df["End"] = df["End"] * sample_rate
    return df

In [151]:
test = readLabels(labels[0], 4800)

In [152]:
test

Unnamed: 0,Start,End,Low,Height,Path,Label
0,1.293488e+03,1.888492e+04,1002.4,1806.2,D:\developer\python\gibbon\Raw Audio Files\Pha...,bird
1,3.983942e+04,5.406779e+04,1091.7,1667.3,D:\developer\python\gibbon\Raw Audio Files\Pha...,mm
2,1.040599e+05,1.118208e+05,1240.5,1578.0,D:\developer\python\gibbon\Raw Audio Files\Pha...,bird
3,1.400189e+05,1.565755e+05,1111.5,1647.4,D:\developer\python\gibbon\Raw Audio Files\Pha...,mm
4,1.294123e+05,1.338101e+05,1220.7,1439.0,D:\developer\python\gibbon\Raw Audio Files\Pha...,other
...,...,...,...,...,...,...
279,1.410751e+06,1.414135e+06,4557.0,5974.7,D:\developer\python\gibbon\Raw Audio Files\Pha...,insect
280,1.416513e+06,1.420172e+06,4540.1,6008.4,D:\developer\python\gibbon\Raw Audio Files\Pha...,insect
281,1.422550e+06,1.426117e+06,4523.2,5890.3,D:\developer\python\gibbon\Raw Audio Files\Pha...,insect
282,1.428221e+06,1.431788e+06,4692.0,5805.9,D:\developer\python\gibbon\Raw Audio Files\Pha...,insect


In [153]:
alpha = 10
jump_seconds = 1
sample_rate = 4800

alpha_converted = alpha * sample_rate

extracted = []

In [154]:
audio = librosa.load(absolute_path, sr = sample_rate)

In [155]:
for index, row in test.iterrows():
    jump = 0
    
    while True:
        start_position = row["Start"] - sample_rate - (jump * jump_seconds * sample_rate)
        end_position = start_position + alpha_converted
        
        print(f"Original start: {row['Start']}, New start: {start_position}")
        print(f"Original end: {row['End']}, New end: {end_position}")
        print("\n")
        
        jump += 1
        
        if end_position <= row["End"]:
            break
        extracted.append(audio[int(start_position):int(end_position)])
    break

Original start: 1293.4877472, New start: -3506.5122528
Original end: 18884.9210736, New end: 44493.4877472


Original start: 1293.4877472, New start: -8306.5122528
Original end: 18884.9210736, New end: 39693.4877472


Original start: 1293.4877472, New start: -13106.5122528
Original end: 18884.9210736, New end: 34893.4877472


Original start: 1293.4877472, New start: -17906.5122528
Original end: 18884.9210736, New end: 30093.4877472


Original start: 1293.4877472, New start: -22706.5122528
Original end: 18884.9210736, New end: 25293.4877472


Original start: 1293.4877472, New start: -27506.5122528
Original end: 18884.9210736, New end: 20493.4877472


Original start: 1293.4877472, New start: -32306.5122528
Original end: 18884.9210736, New end: 15693.4877472




In [119]:
array = np.asarray(extracted, dtype = "object")