In [13]:
import pandas as pd
from pathlib import Path
import os

# Directory where the bird dataset is stored
XC_ROOTDIR = '/data/'
XC_DIR = 'bird_dataset'

species_dir = os.path.join(os.path.dirname(os.getcwd()), XC_ROOTDIR, XC_DIR)

print(species_dir)


/data/bird_dataset


In [1]:
import pandas as pd
from pathlib import Path
import os

# Directory where the bird dataset is stored
XC_ROOTDIR = './data/'
XC_DIR = 'bird_dataset'

# Initialize an empty list to store file details
filelist = []

# Walk through the directories of species
species_dir = os.path.join(os.getcwd(), XC_ROOTDIR, XC_DIR)
for species_folder in os.listdir(species_dir):
    species_path = os.path.join(species_dir, species_folder)
    if os.path.isdir(species_path):  # Ensure it's a directory
        for file_name in os.listdir(species_path):
            if file_name.endswith('.wav'):  # Filter for .wav files
                filelist.append({
                    'fullfilename': os.path.join(species_path, file_name),
                    'filename': Path(file_name).stem,
                    'species': species_folder  # The folder name is the species
                })

# Create a DataFrame with the gathered file details
df_files_wav = pd.DataFrame(filelist)
df_files_wav['commonname'] = df_files_wav['species'].apply(lambda x: x.split('_')[1])

# Display a summary of the DataFrame
print('=====================================================')
print(f'Number of files: {len(df_files_wav)}')
print(f'Number of species: {len(df_files_wav["species"].unique())}')
print('=====================================================')

# Print the first few rows of the DataFrame to verify
df_files_wav

Number of files: 1000
Number of species: 20


Unnamed: 0,fullfilename,filename,species,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC721665,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC691616,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC140016,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC133365,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC624508,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
...,...,...,...,...
995,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC447423,Geothlypis trichas_Common Yellowthroat,Common Yellowthroat
996,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC556704,Geothlypis trichas_Common Yellowthroat,Common Yellowthroat
997,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC909299,Geothlypis trichas_Common Yellowthroat,Common Yellowthroat
998,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC828478,Geothlypis trichas_Common Yellowthroat,Common Yellowthroat


In [2]:
import math

def calculate_windows(seconds):
    a = seconds - 30
    b = a / 10
    return math.ceil(b)

In [3]:
import os
import pandas as pd
from pydub import AudioSegment
from scipy.io import wavfile
import matplotlib.pyplot as plt

skipped_file_list = []

def process_audio_with_sliding_window(input_file: str, output_folder: str, window_size: int = 30, step_size: int = 10):
    """
    Process an audio file with a sliding window and generate spectrograms for each window.

    Args:
        input_file (str): Path to the input .wav file.
        output_folder (str): Directory to save the spectrograms.
        window_size (int): Size of the window in seconds (default is 30).
        step_size (int): Step size for the sliding window in seconds (default is 10).
        
    """
    try:
        os.makedirs(output_folder, exist_ok=True)
        audio = AudioSegment.from_wav(input_file)
        duration_seconds = len(audio) / 1000  # Convert to seconds

        if duration_seconds < 30:
            print(f"\n\nSkipped {input_file}: File is too short ({duration_seconds} seconds).\n\n")
            skipped_file_list.append({
                    'fullfilename': input_file,
                    'duration': duration_seconds,
                })
            return

        print(f"File is {duration_seconds} seconds and should have {calculate_windows(duration_seconds)} windows.")
        
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        counter = 1
    
        for start_time in range(0, int(duration_seconds - window_size) + 1, step_size):
            end_time = start_time + window_size
            window_audio = audio[start_time * 1000:end_time * 1000]  # Extract the window
    
            temp_wav_file = os.path.join(output_folder, f"temp_{start_time}_{end_time}.wav")
            window_audio.export(temp_wav_file, format="wav")
    
            output_filename = f"{base_name}_{counter}"  # Use counter for unique naming
            generate_spectrogram(temp_wav_file, output_folder, output_filename)
    
            os.remove(temp_wav_file)  # Cleanup temp file
            counter += 1

            
    except Exception as e:
        print(f"\n\nError processing {input_file}: {e}\n\n")



def generate_spectrogram(wav_file, output_folder, output_filename):
    """
    Generate a spectrogram from a .wav file and save it as a PNG image.

    Args:
        wav_file (str): Path to the .wav file.
        output_folder (str): Directory to save the spectrogram.
        output_filename (str): Name of the output file (without extension).
    """
    sample_rate, data = wavfile.read(wav_file)
    if len(data.shape) > 1:
        data = data[:, 0]

    plt.specgram(data, Fs=sample_rate, cmap="viridis")
    output_path = os.path.join(output_folder, f"{output_filename}.png")
    plt.axis("off")
    plt.savefig(output_path, bbox_inches="tight", pad_inches=0)
    plt.close()
    print(f"Spectrogram saved: {output_path}")

In [11]:
def process_files(dataframe, output_folder):
    counter = 1
    for _, row in dataframe.iterrows():
        input_wav_file = row['fullfilename']
        species_name = row['commonname']
        
        # Create species folder
        species_folder = os.path.join(output_folder, species_name)
        os.makedirs(species_folder, exist_ok=True)
    
        print(f"\nProcessing {counter}/{len(dataframe)}, {input_wav_file} for species: {species_name}")
        process_audio_with_sliding_window(input_wav_file, species_folder)
        counter += 1
    
    print("\n\nAll files have been processed.")

output_root_folder = './data/spectrograms_windowed_dataset'

In [9]:
df_files_wav_part1 = df_files_wav.head(250)
df_files_wav_part1

Unnamed: 0,fullfilename,filename,species,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC721665,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC691616,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC140016,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC133365,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC624508,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
...,...,...,...,...
245,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC139068,Setophaga dominica_Yellow-throated Warbler,Yellow-throated Warbler
246,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC423238,Setophaga dominica_Yellow-throated Warbler,Yellow-throated Warbler
247,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC404731,Setophaga dominica_Yellow-throated Warbler,Yellow-throated Warbler
248,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC76710,Setophaga dominica_Yellow-throated Warbler,Yellow-throated Warbler


In [12]:
process_files(df_files_wav_part1, output_root_folder)


Processing 1/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga magnolia_Magnolia Warbler/XC721665.wav for species: Magnolia Warbler
File is 139.425 seconds and should have 11 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC721665_9.p

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC133365_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC133365_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC133365_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC133365_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC133365_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC133365_6.png

Processing 5/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga magnolia_Magnolia Warbler/XC624508.wav for species: Magnolia Warbler
File is 122.863 seconds and should have 10 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC624508_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC624508_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC624508_3.p

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC922000_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC922000_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC922000_3.png

Processing 164/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga citrina_Hooded Warbler/XC710130.wav for species: Hooded Warbler
File is 62.451 seconds and should have 4 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC710130_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC710130_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC710130_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC710130_4.png

Processing 165/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga citrina_Hooded Warbler/XC805865.wav for species: Hooded Warbler
File is 67.966 seconds and should ha

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC33583_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC33583_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC33583_3.png

Processing 178/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga citrina_Hooded Warbler/XC20524.wav for species: Hooded Warbler
File is 53.064 seconds and should have 3 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC20524_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC20524_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC20524_3.png

Processing 179/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga citrina_Hooded Warbler/XC827854.wav for species: Hooded Warbler
File is 151.944 seconds and should have 13 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC8278

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC76716_7.png

Processing 197/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga citrina_Hooded Warbler/XC364660.wav for species: Hooded Warbler
File is 84.219 seconds and should have 6 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC364660_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Hooded Warbler/XC364660_2.png
Spectrogram saved: ./data/s

In [13]:
df_skipped_files = pd.DataFrame(skipped_file_list)
df_skipped_files.iloc[0]['fullfilename']
# Extract the base folder name from the file path
df_skipped_files['species'] = df_skipped_files['fullfilename'].apply(
    lambda x: os.path.basename(os.path.dirname(x))
)
df_skipped_files['commonname'] = df_skipped_files['species'].apply(lambda x: x.split('_')[1])
# View the updated dataframe
df_skipped_files


Unnamed: 0,fullfilename,duration,species,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.214,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,4.237,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,23.904,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.352,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,26.666,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
5,/Users/solomongreene/DIS/DIS Core Course/./dat...,3.447,Setophaga tigrina_Cape May Warbler,Cape May Warbler
6,/Users/solomongreene/DIS/DIS Core Course/./dat...,2.822,Setophaga tigrina_Cape May Warbler,Cape May Warbler
7,/Users/solomongreene/DIS/DIS Core Course/./dat...,5.028,Setophaga tigrina_Cape May Warbler,Cape May Warbler
8,/Users/solomongreene/DIS/DIS Core Course/./dat...,12.407,Setophaga tigrina_Cape May Warbler,Cape May Warbler
9,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.118,Setophaga tigrina_Cape May Warbler,Cape May Warbler


In [14]:
# Next 250 entries (250-499)
df_files_wav_part2 = df_files_wav.iloc[250:500]
df_files_wav_part2

Unnamed: 0,fullfilename,filename,species,commonname
250,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC390885,Setophaga pinus_Pine Warbler,Pine Warbler
251,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC691777,Setophaga pinus_Pine Warbler,Pine Warbler
252,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC638320,Setophaga pinus_Pine Warbler,Pine Warbler
253,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC638308,Setophaga pinus_Pine Warbler,Pine Warbler
254,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC177122,Setophaga pinus_Pine Warbler,Pine Warbler
...,...,...,...,...
495,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC79627,Protonotaria citrea_Prothonotary Warbler,Prothonotary Warbler
496,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC173798,Protonotaria citrea_Prothonotary Warbler,Prothonotary Warbler
497,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC76706,Protonotaria citrea_Prothonotary Warbler,Prothonotary Warbler
498,/Users/solomongreene/DIS/DIS Core Course/./dat...,XC173799,Protonotaria citrea_Prothonotary Warbler,Prothonotary Warbler


In [15]:
process_files(df_files_wav_part2, output_root_folder)


Processing 1/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pinus_Pine Warbler/XC390885.wav for species: Pine Warbler
File is 186.436 seconds and should have 16 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC390885_9.png
Spectrogram saved: ./data/spectrograms_windo

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638308_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638308_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638308_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638308_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638308_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638308_6.png

Processing 5/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pinus_Pine Warbler/XC177122.wav for species: Pine Warbler
File is 69.661 seconds and should have 4 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC177122_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC177122_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC177122_3.png
Spectrogram saved: ./data/spectrograms_windowe

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC356277_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC356277_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC356277_3.png

Processing 30/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pinus_Pine Warbler/XC895216.wav for species: Pine Warbler
File is 43.593 seconds and should have 2 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC895216_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC895216_2.png

Processing 31/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pinus_Pine Warbler/XC895213.wav for species: Pine Warbler
File is 59.119 seconds and should have 3 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC895213_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC895213_2.png
Spectrogram sa

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC640357_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC640357_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC640357_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC640357_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC640357_5.png

Processing 33/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pinus_Pine Warbler/XC553224.wav for species: Pine Warbler
File is 84.59 seconds and should have 6 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC553224_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC553224_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC553224_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC553224_4.png
Spectrogram saved: ./data/spectrograms_windowe

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC33529_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC33529_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC33529_3.png

Processing 45/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pinus_Pine Warbler/XC638307.wav for species: Pine Warbler
File is 130.064 seconds and should have 11 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638307_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638307_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638307_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638307_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638307_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Pine Warbler/XC638307_6.png
Spectrogram saved: ./data/spectrograms_windowe

In [16]:
df_skipped_files = pd.DataFrame(skipped_file_list)
df_skipped_files.iloc[0]['fullfilename']
# Extract the base folder name from the file path
df_skipped_files['species'] = df_skipped_files['fullfilename'].apply(
    lambda x: os.path.basename(os.path.dirname(x))
)
df_skipped_files['commonname'] = df_skipped_files['species'].apply(lambda x: x.split('_')[1])
# View the updated dataframe
df_skipped_files


Unnamed: 0,fullfilename,duration,species,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.214,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,4.237,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,23.904,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.352,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,26.666,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
5,/Users/solomongreene/DIS/DIS Core Course/./dat...,3.447,Setophaga tigrina_Cape May Warbler,Cape May Warbler
6,/Users/solomongreene/DIS/DIS Core Course/./dat...,2.822,Setophaga tigrina_Cape May Warbler,Cape May Warbler
7,/Users/solomongreene/DIS/DIS Core Course/./dat...,5.028,Setophaga tigrina_Cape May Warbler,Cape May Warbler
8,/Users/solomongreene/DIS/DIS Core Course/./dat...,12.407,Setophaga tigrina_Cape May Warbler,Cape May Warbler
9,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.118,Setophaga tigrina_Cape May Warbler,Cape May Warbler


In [17]:
# Next 250 entries (500-749)
df_files_wav_part3 = df_files_wav.iloc[500:750]

In [18]:
process_files(df_files_wav_part3, output_root_folder)


Processing 1/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Seiurus aurocapilla_Ovenbird/XC549993.wav for species: Ovenbird
File is 95.191 seconds and should have 7 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC549993_7.png

Processing 2/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Seiurus aurocapilla_Ovenbird/XC189982.wav for species: Ovenbird
File is 112.692 seconds and should have 9 windows.


  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189982_9.png

Processing 3/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Seiurus aurocapilla_Ovenbird/XC189981.wav for species: Ovenbird
File is 192.183 seconds and should have 17 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC189981_1.png
Spec

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC460693_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC460693_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC460693_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC460693_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC460693_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC460693_6.png

Processing 159/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Mniotilta varia_Black-and-white Warbler/XC486856.wav for species: Black-and-white Warbler
File is 75.336 seconds and should have 5 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC486856_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-and-white Warbler/XC486856_2.png
Spectrogram saved:

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825245_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825245_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825245_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825245_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825245_5.png

Processing 217/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga ruticilla_American Redstart/XC78803.wav for species: American Redstart
File is 127.8 seconds and should have 10 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78803_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78803_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78803_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_9.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_10.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC825120_11.png
Spectrogram saved: 

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78804_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78804_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78804_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78804_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78804_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC78804_6.png

Processing 226/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga ruticilla_American Redstart/XC315359.wav for species: American Redstart
File is 172.33 seconds and should have 15 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC315359_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC315359_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/American Redstart/XC315

In [19]:
df_skipped_files = pd.DataFrame(skipped_file_list)
df_skipped_files.iloc[0]['fullfilename']
# Extract the base folder name from the file path
df_skipped_files['species'] = df_skipped_files['fullfilename'].apply(
    lambda x: os.path.basename(os.path.dirname(x))
)
df_skipped_files['commonname'] = df_skipped_files['species'].apply(lambda x: x.split('_')[1])
# View the updated dataframe
df_skipped_files

Unnamed: 0,fullfilename,duration,species,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.214,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,4.237,Setophaga magnolia_Magnolia Warbler,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,23.904,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.352,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,26.666,Setophaga coronata_Myrtle Warbler,Myrtle Warbler
5,/Users/solomongreene/DIS/DIS Core Course/./dat...,3.447,Setophaga tigrina_Cape May Warbler,Cape May Warbler
6,/Users/solomongreene/DIS/DIS Core Course/./dat...,2.822,Setophaga tigrina_Cape May Warbler,Cape May Warbler
7,/Users/solomongreene/DIS/DIS Core Course/./dat...,5.028,Setophaga tigrina_Cape May Warbler,Cape May Warbler
8,/Users/solomongreene/DIS/DIS Core Course/./dat...,12.407,Setophaga tigrina_Cape May Warbler,Cape May Warbler
9,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.118,Setophaga tigrina_Cape May Warbler,Cape May Warbler


In [20]:
# Last 250 entries (750-999)
df_files_wav_part4 = df_files_wav.iloc[750:1000]

In [21]:
process_files(df_files_wav_part4, output_root_folder)


Processing 1/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga pensylvanica_Chestnut-sided Warbler/XC479501.wav for species: Chestnut-sided Warbler
File is 145.58 seconds and should have 12 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC479501_8.png
Spectrogram saved: ./d

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_9.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Chestnut-sided Warbler/XC370542_10.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Che

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_9.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_10.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC7346_11.png
Spectrogram saved: ./data/spectrograms_windowed_data

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC33606_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC33606_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC33606_3.png

Processing 75/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC934430.wav for species: Cerulean Warbler
File is 30.218 seconds and should have 1 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC934430_1.png

Processing 76/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC315358.wav for species: Cerulean Warbler
File is 115.487 seconds and should have 9 windows.


  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC315358_9.png

Processing 77/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC570835.wav for species: Cerulean Warbler
File is 58.56 seconds and should have 3 windows.


  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC649987_16.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC649987_17.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC649987_18.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC649987_19.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC649987_20.png

Processing 86/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC913277.wav for species: Cerulean Warbler
File is 49.168 seconds and should have 2 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC913277_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC913277_2.png

Processing 87/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC934435.wav for species: Cerulean Warbler


Skipped /

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC583857_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC583857_2.png

Processing 90/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC33464.wav for species: Cerulean Warbler
File is 58.008 seconds and should have 3 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC33464_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC33464_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC33464_3.png

Processing 91/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga cerulea_Cerulean Warbler/XC101914.wav for species: Cerulean Warbler
File is 52.741 seconds and should have 3 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Cerulean Warbler/XC101914_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ce

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC78807_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC78807_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC78807_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC78807_4.png

Processing 123/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga fusca_Blackburnian Warbler/XC690975.wav for species: Blackburnian Warbler
File is 73.96 seconds and should have 5 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC690975_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC690975_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC690975_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC690975_4.png
Spectrogram saved: ./data/spectrograms_windowed_datas

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC142594_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC142594_2.png

Processing 136/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga fusca_Blackburnian Warbler/XC246776.wav for species: Blackburnian Warbler
File is 217.134 seconds and should have 19 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC246776_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC246776_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC246776_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC246776_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC246776_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Blackburnian Warbler/XC246776_6.png
Spectrogram saved: ./data/spectrograms_windowe

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC101841_9.png

Processing 156/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga discolor_Prairie Warbler/XC317241.wav for species: Prairie Warbler
File is 187.141 seconds and should have 16 windows.
Spectr

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC244518_8.png

Processing 172/250, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga discolor_Prairie Warbler/XC137598.wav for species: Prairie Warbler
File is 93.941 seconds and should have 7 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prairie Warbler/XC137598_1.png
Spectrog

In [24]:
df_skipped_files = pd.DataFrame(skipped_file_list)

# Extract the base folder name from the file path
df_skipped_files['species'] = df_skipped_files['fullfilename'].apply(
    lambda x: os.path.basename(os.path.dirname(x))
)
df_skipped_files['filename'] = df_skipped_files['fullfilename'].apply(
    lambda x: os.path.basename(x)
)

df_skipped_files['commonname'] = df_skipped_files['species'].apply(lambda x: x.split('_')[1])
# View the updated dataframe
df_skipped_files

Unnamed: 0,fullfilename,duration,species,filename,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.214,Setophaga magnolia_Magnolia Warbler,XC787201.wav,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,4.237,Setophaga magnolia_Magnolia Warbler,XC915936.wav,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,23.904,Setophaga coronata_Myrtle Warbler,XC798976.wav,Myrtle Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.352,Setophaga coronata_Myrtle Warbler,XC839518.wav,Myrtle Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,26.666,Setophaga coronata_Myrtle Warbler,XC898134.wav,Myrtle Warbler
5,/Users/solomongreene/DIS/DIS Core Course/./dat...,3.447,Setophaga tigrina_Cape May Warbler,XC902334.wav,Cape May Warbler
6,/Users/solomongreene/DIS/DIS Core Course/./dat...,2.822,Setophaga tigrina_Cape May Warbler,XC902336.wav,Cape May Warbler
7,/Users/solomongreene/DIS/DIS Core Course/./dat...,5.028,Setophaga tigrina_Cape May Warbler,XC902337.wav,Cape May Warbler
8,/Users/solomongreene/DIS/DIS Core Course/./dat...,12.407,Setophaga tigrina_Cape May Warbler,XC902332.wav,Cape May Warbler
9,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.118,Setophaga tigrina_Cape May Warbler,XC784610.wav,Cape May Warbler


In [26]:
df_test = df_skipped_files.iloc[2:5]
df_test

Unnamed: 0,fullfilename,duration,species,filename,commonname
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,23.904,Setophaga coronata_Myrtle Warbler,XC798976.wav,Myrtle Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.352,Setophaga coronata_Myrtle Warbler,XC839518.wav,Myrtle Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,26.666,Setophaga coronata_Myrtle Warbler,XC898134.wav,Myrtle Warbler


In [27]:
process_files(df_test, output_root_folder)


Processing 1/3, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga coronata_Myrtle Warbler/XC798976.wav for species: Myrtle Warbler
File is 50.531 seconds and should have 3 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC798976_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC798976_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC798976_3.png

Processing 2/3, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga coronata_Myrtle Warbler/XC839518.wav for species: Myrtle Warbler
File is 93.904 seconds and should have 7 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC839518_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC839518_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC839518_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Myrtle Warbler/XC83951

In [30]:
df_remaining = df_skipped_files[~df_skipped_files.index.isin(df_test.index)].reset_index(drop=True)
df_remaining

Unnamed: 0,fullfilename,duration,species,filename,commonname
0,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.214,Setophaga magnolia_Magnolia Warbler,XC787201.wav,Magnolia Warbler
1,/Users/solomongreene/DIS/DIS Core Course/./dat...,4.237,Setophaga magnolia_Magnolia Warbler,XC915936.wav,Magnolia Warbler
2,/Users/solomongreene/DIS/DIS Core Course/./dat...,3.447,Setophaga tigrina_Cape May Warbler,XC902334.wav,Cape May Warbler
3,/Users/solomongreene/DIS/DIS Core Course/./dat...,2.822,Setophaga tigrina_Cape May Warbler,XC902336.wav,Cape May Warbler
4,/Users/solomongreene/DIS/DIS Core Course/./dat...,5.028,Setophaga tigrina_Cape May Warbler,XC902337.wav,Cape May Warbler
5,/Users/solomongreene/DIS/DIS Core Course/./dat...,12.407,Setophaga tigrina_Cape May Warbler,XC902332.wav,Cape May Warbler
6,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.118,Setophaga tigrina_Cape May Warbler,XC784610.wav,Cape May Warbler
7,/Users/solomongreene/DIS/DIS Core Course/./dat...,0.158,Setophaga citrina_Hooded Warbler,XC879099.wav,Hooded Warbler
8,/Users/solomongreene/DIS/DIS Core Course/./dat...,1.983,Setophaga citrina_Hooded Warbler,XC796399.wav,Hooded Warbler
9,/Users/solomongreene/DIS/DIS Core Course/./dat...,29.945,Setophaga dominica_Yellow-throated Warbler,XC362338.wav,Yellow-throated Warbler


In [31]:
process_files(df_remaining, output_root_folder)


Processing 1/42, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga magnolia_Magnolia Warbler/XC787201.wav for species: Magnolia Warbler
File is 83.89 seconds and should have 6 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC787201_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC787201_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC787201_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC787201_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC787201_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Magnolia Warbler/XC787201_6.png

Processing 2/42, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga magnolia_Magnolia Warbler/XC915936.wav for species: Magnolia Warbler
File is 56.574 seconds and should have 3 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/M

  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC933480_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC933480_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC933480_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC933480_4.png

Processing 14/42, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga caerulescens_Black-throated Blue Warbler/XC900808.wav for species: Black-throated Blue Warbler
File is 129.11 seconds and should have 10 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC900808_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC900808_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated Blue Warbler/XC900808_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Black-throated 

In [36]:
path1 = os.getcwd() + '/data/bird_dataset/Seiurus aurocapilla_Ovenbird/XC922808.wav'
path2 = os.getcwd() + '/data/bird_dataset/Setophaga virens_Black-throated Green Warbler/XC922249.wav'
path3 = os.getcwd() + '/data/bird_dataset/Setophaga ruticilla_American Redstart/XC924064.wav'

# Create the DataFrame
data = {
    'fullfilename': [path1, path2, path3],
    'species': [
        'Seiurus aurocapilla_Ovenbird',
        'Setophaga virens_Black-throated Green Warbler',
        'Setophaga ruticilla_American Redstart'
    ]
}

df_too_long = pd.DataFrame(data)
df_too_long['commonname'] = df_too_long['species'].apply(lambda x: x.split('_')[1])

df_too_long 

Unnamed: 0,fullfilename,species,commonname
0,/Users/solomongreene/DIS/DIS Core Course/data/...,Seiurus aurocapilla_Ovenbird,Ovenbird
1,/Users/solomongreene/DIS/DIS Core Course/data/...,Setophaga virens_Black-throated Green Warbler,Black-throated Green Warbler
2,/Users/solomongreene/DIS/DIS Core Course/data/...,Setophaga ruticilla_American Redstart,American Redstart


In [37]:
process_files(df_too_long, output_root_folder)


Processing 1/3, /Users/solomongreene/DIS/DIS Core Course/data/bird_dataset/Seiurus aurocapilla_Ovenbird/XC922808.wav for species: Ovenbird
File is 225.146 seconds and should have 20 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_1.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_2.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_3.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_4.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_5.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_6.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_7.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_8.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_9.png
Spectrogram saved: ./data/spectrograms_windowed_dataset/Ovenbird/XC922808_10.png
Spectro

In [40]:
df_redo = df_skipped_files.iloc[[12, 23]]
df_redo


Unnamed: 0,fullfilename,duration,species,filename,commonname
12,/Users/solomongreene/DIS/DIS Core Course/./dat...,29.945,Setophaga dominica_Yellow-throated Warbler,XC362338.wav,Yellow-throated Warbler
23,/Users/solomongreene/DIS/DIS Core Course/./dat...,29.952,Protonotaria citrea_Prothonotary Warbler,XC57320.wav,Prothonotary Warbler


In [41]:
from pydub import AudioSegment

def pad_audio_to_length(input_path, output_path, target_length=30.001):
    """
    Pads an audio file with silence to reach a specified length.

    Parameters:
        input_path (str): Path to the input audio file.
        output_path (str): Path to save the padded audio file.
        target_length (float): Target length in seconds.
    """
    try:
        # Load the audio file
        audio = AudioSegment.from_file(input_path)
        current_length = len(audio) / 1000  # Convert length to seconds

        if current_length < target_length:
            # Calculate padding duration in milliseconds
            padding_duration = int((target_length - current_length) * 1000)
            padding = AudioSegment.silent(duration=padding_duration)
            
            # Add padding to the end of the audio
            padded_audio = audio + padding
            
            # Save the padded audio
            padded_audio.export(output_path, format="wav")
            print(f"Audio padded and saved to {output_path}. Final length: {len(padded_audio) / 1000} seconds.")
        else:
            print(f"No padding needed. Current length: {current_length} seconds.")
    except Exception as e:
        print(f"Error processing {input_path}: {e}")

# Example usage
input_path_1 = "/Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga dominica_Yellow-throated Warbler/XC362338.wav"
input_path_2 = "/Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Protonotaria citrea_Prothonotary Warbler/XC57320.wav"

output_path_1 = "/Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga dominica_Yellow-throated Warbler/XC362338_padded.wav"
output_path_2 = "/Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Protonotaria citrea_Prothonotary Warbler/XC57320_padded.wav"

pad_audio_to_length(input_path_1, output_path_1)
pad_audio_to_length(input_path_2, output_path_2)


Audio padded and saved to /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga dominica_Yellow-throated Warbler/XC362338_padded.wav. Final length: 30.001 seconds.
Audio padded and saved to /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Protonotaria citrea_Prothonotary Warbler/XC57320_padded.wav. Final length: 30.0 seconds.


In [42]:
process_files(df_redo, output_root_folder)


Processing 1/2, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Setophaga dominica_Yellow-throated Warbler/XC362338.wav for species: Yellow-throated Warbler
File is 30.001 seconds and should have 1 windows.


  Z = 10. * np.log10(spec)


Spectrogram saved: ./data/spectrograms_windowed_dataset/Yellow-throated Warbler/XC362338_1.png

Processing 2/2, /Users/solomongreene/DIS/DIS Core Course/./data/bird_dataset/Protonotaria citrea_Prothonotary Warbler/XC57320.wav for species: Prothonotary Warbler
File is 30.0 seconds and should have 0 windows.
Spectrogram saved: ./data/spectrograms_windowed_dataset/Prothonotary Warbler/XC57320_1.png


All files have been processed.
