# Data Preproccesing for CNN Model

## Data Downloader from iNaturlist

In [None]:
import pandas as pd
import requests
import os
from urllib.parse import urlparse
from tqdm import tqdm  # Import tqdm for the progress bar

# Read the CSV file
df = pd.read_csv('../Datasets/Inat/observations-all-recordings.csv/observations-598206.csv')

# Create a directory to save the sound files
os.makedirs('frog_sounds', exist_ok=True)

# Helper function to generate a unique filename
def get_unique_filename(directory, base_name, extension):
    full_path = os.path.join(directory, base_name + extension)
    counter = 1
    while os.path.exists(full_path):
        full_path = os.path.join(directory, f"{base_name}_{counter}{extension}")
        counter += 1
    return full_path

# Create a progress bar for the total number of downloads
with tqdm(total=len(df), desc="Downloading frog sounds") as pbar:
    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        sound_url = row['sound_url']
        common_name = row['common_name']
        
        if pd.notna(sound_url) and pd.notna(common_name):
            try:
                # Extract the file extension from the URL
                parsed_url = urlparse(sound_url)
                filename = os.path.basename(parsed_url.path)
                extension = os.path.splitext(filename)[1]
                
                # Clean the common name to make it a valid filename
                clean_name = "".join(c if c.isalnum() or c in (' ', '-') else '_' for c in common_name).strip()
                clean_name = clean_name.replace(' ', '_')  # Optional: replace spaces with underscores
                
                # Generate a unique filename
                unique_filename = get_unique_filename('frog_sounds', clean_name, extension)
                
                # Download the file
                response = requests.get(sound_url, stream=True)
                if response.status_code == 200:
                    with open(unique_filename, 'wb') as f:
                        for chunk in response.iter_content(1024):
                            f.write(chunk)
                    # print(f"Downloaded: {unique_filename}")
                else:
                    print(f"Failed to download: {sound_url}")
            except Exception as e:
                print(f"Error processing {sound_url}: {e}")

            # Update the progress bar after each download
            pbar.update(1)

print("Download complete.")

## Convert all files into .WAV


In [2]:
import os
import subprocess
from pathlib import Path
from tqdm import tqdm

def convert_to_spectrogram_ready(input_dir, output_dir, sample_rate=22050, mono=True):
    
    Path(output_dir).mkdir(parents=True, exist_ok=True)
    
    audio_files = []
    extensions = ('.wav', '.mp3', '.mp4', '.m4a', '.aac', '.flac', '.ogg', '.mpga')
    
    for root, _, files in os.walk(input_dir):
        for file in files:
            # if file.lower().endswith(extensions):
                audio_files.append(Path(root) / file)
    
    print(f"Found {len(audio_files)} audio files to process")
    
    for input_file in tqdm(audio_files, desc="Converting for spectrograms"):
        output_file = Path(output_dir) / f"{input_file.stem}.wav"
        
        # Check if file exists and modify name if needed
        counter = 1
        while output_file.exists():
            output_file = Path(output_dir) / f"{input_file.stem}_{counter}.wav"
            counter += 1
        
        cmd = [
            'ffmpeg',
            '-i', str(input_file),
            '-ac', '1' if mono else '2',  # mono recommended for spectrograms
            '-ar', str(sample_rate),  # 22050 is standard for many ML models
            '-sample_fmt', 's16',  # 16-bit PCM
            '-acodec', 'pcm_s16le',  # Standard WAV format
            '-y',  # Still overwrites if same name, but we prevent that with the loop
            str(output_file)
        ]
        
        try:
            subprocess.run(cmd, check=True, 
                         stdout=subprocess.DEVNULL, 
                         stderr=subprocess.PIPE)
        except subprocess.CalledProcessError as e:
            print(f"\nError processing {input_file}: {e.stderr.decode('utf-8')}")
    
    print(f"\nConversion complete. Files ready for spectrogram generation in {output_dir}")

if __name__ == "__main__":
    convert_to_spectrogram_ready(
        input_dir="../Datasets/Raw-audio/aug-test/",
        output_dir="../Datasets/Raw-audio/aug-test/"
    )

Found 5 audio files to process


Converting for spectrograms: 100%|██████████| 5/5 [00:00<00:00, 20.72it/s]


Conversion complete. Files ready for spectrogram generation in ../Datasets/Raw-audio/aug-test/





## Rename files for only ID most common frogs

In [2]:
import os
from pathlib import Path
from collections import defaultdict

def rename_frog_files(directory):
    # Files to keep original names (all lowercase for comparison)
    KEEP_NAMES = {
        "painted_reed_frog",
        "weale_s_running_frog",
        "de_villiers_moss_frog",
        "cape_peninsula_moss_frog",
        "eastern_rose_rain_frog",
        "banded_stream_frog",
        "Rain",
        "clicking_stream_frog",
        "southern_dainty_frog",
        "cape_mountain_rain_frog"
    }
    
    # Initialize counter for other_frog files
    other_counter = 1
    
    # Get all .wav files in directory
    for filepath in Path(directory).glob("*.wav"):
        original_name = filepath.stem.lower()  # Get filename without extension
        
        # Check if we should keep original name
        should_keep = any(
            keep_name.lower() in original_name 
            for keep_name in KEEP_NAMES
        )
        
        if should_keep:
            print(f"Keeping: {filepath.name}")
            continue  # Skip renaming
        
        # Generate new name
        new_name = f"other_frog_{other_counter}.wav"
        new_path = filepath.with_name(new_name)
        
        # Handle potential name conflicts
        while new_path.exists():
            other_counter += 1
            new_name = f"other_frog_{other_counter}.wav"
            new_path = filepath.with_name(new_name)
        
        # Rename the file
        filepath.rename(new_path)
        print(f"Renamed: {filepath.name} -> {new_name}")
        other_counter += 1

if __name__ == "__main__":
    directory_path = "../Datasets/Raw-audio/calls-clipped/"
    rename_frog_files(directory_path)
    print("File renaming complete!")

Renamed: 51_Eastern_Leopard_Toad_segment_5.wav -> other_frog_1.wav
Renamed: 94_Tinker_Reed_Frog_segment_4.wav -> other_frog_2.wav
Renamed: 110_Nile_Grass_Frog_segment_2.wav -> other_frog_3.wav
Renamed: 60_Cederberg_Ghost_Frog_segment_6.wav -> other_frog_4.wav
Renamed: 149_Striped_Caco_segment_1.wav -> other_frog_5.wav
Keeping: 159_Clicking_Stream_Frog_segment_1.wav
Renamed: 03_Bush_Squeaker_segment_2.wav -> other_frog_6.wav
Renamed: 73_Knysna_Leaffolding_Frog_segment_3.wav -> other_frog_7.wav
Renamed: 89_Powers_Long_Reed_Frog_segment_3.wav -> other_frog_8.wav
Renamed: 59_Natal_Cascade_Frog_segment_1.wav -> other_frog_9.wav
Renamed: 117_Cape_Platanna_segment_1.wav -> other_frog_10.wav
Renamed: 78_Arum_Lily_Frog_segment_1.wav -> other_frog_11.wav
Keeping: 16_Zululand_Rain_Frog_segment_1.wav
Keeping: 14_Bilbos_Rain_Frog_segment_3.wav
Keeping: 29_Northern_Forest_Rain_Frog_B_s_taeniatus_segment_2.wav
Renamed: 168_Natal_Sand_Frog_segment_3.wav -> other_frog_12.wav
Renamed: 127_Maluti_River_F

## Get file Info

In [9]:
import os
from pathlib import Path
import wave
import contextlib
from collections import defaultdict

def analyze_short_frog_files(directory, max_duration=4):
    """
    Finds all WAV files shorter than specified duration and counts by frog type.
    
    Args:
        directory (str): Path to directory containing WAV files
        max_duration (float): Maximum duration in seconds to consider as "short"
        
    Returns:
        tuple: (list of short files with durations, frog type counts, total count)
    """
    short_files = []
    frog_counts = defaultdict(int)
    special_frogs = {
        'mountain_rain_frog',
        'southern_dainty_frog',
        'clicking_stream_frog'
    }

    for filepath in Path(directory).glob('*.wav'):
        try:
            with contextlib.closing(wave.open(str(filepath), 'r')) as wav_file:
                frames = wav_file.getnframes()
                rate = wav_file.getframerate()
                duration = frames / float(rate)
                
                if duration < max_duration:
                    filename = filepath.stem.lower()
                    
                    # Determine frog type
                    frog_type = "other"
                    for special in special_frogs:
                        if special in filename:
                            frog_type = special.replace('_', ' ').title()
                            break
                    
                    short_files.append((filepath.name, duration, frog_type))
                    frog_counts[frog_type] += 1
                    
        except (wave.Error, EOFError) as e:
            print(f"Error processing {filepath.name}: {str(e)}")
            continue
    
    return short_files, frog_counts, len(short_files)

if __name__ == "__main__":
    folder_path = "./frog_sounds_wav/"  # Replace with your folder path
    try:
        short_files, frog_counts, total_count = analyze_short_frog_files(folder_path)
        
        if not short_files:
            print("No files shorter than 5 seconds found.")
        else:
            print("Files shorter than 5 seconds:")
            print("-" * 65)
            print(f"{'Filename':<40} {'Duration':<10} {'Frog Type':<15}")
            print("-" * 65)
            for filename, duration, frog_type in short_files:
                print(f"{filename:<40} {duration:.2f}s{'':<5} {frog_type:<15}")
            
            print("-" * 65)
            print("\nFrog Type Counts:")
            for frog_type, count in sorted(frog_counts.items(), key=lambda x: x[1], reverse=True):
                print(f"{frog_type:<20}: {count}")
            
            print("-" * 65)
            print(f"Total files shorter than 5 seconds: {total_count}")
            
    except Exception as e:
        print(f"Error: {str(e)}")

Files shorter than 5 seconds:
-----------------------------------------------------------------
Filename                                 Duration   Frog Type      
-----------------------------------------------------------------
other_frog_1818.wav                      3.01s      other          
other_frog_3281.wav                      2.40s      other          
other_frog_2736.wav                      2.53s      other          
Clicking_Stream_Frog_191.wav             3.76s      Clicking Stream Frog
other_frog_269.wav                       3.16s      other          
other_frog_111.wav                       2.03s      other          
other_frog_661.wav                       3.37s      other          
other_frog_962.wav                       3.99s      other          
other_frog_1705.wav                      1.08s      other          
other_frog_523.wav                       1.19s      other          
other_frog_3282.wav                      1.21s      other          
other_frog_2594.w

In [4]:
import os
import csv
from collections import defaultdict
from pathlib import Path

def analyze_frog_recordings(directory_path, min_threshold=100000):
    """
    Analyze frog recordings to find species with few recordings.
    
    Args:
        directory_path: Path to directory containing audio files
        min_threshold: Minimum number of recordings to not be considered "few"
    
    Returns:
        dict: {frog_species: count} for species with fewer than min_threshold recordings
    """
    # Initialize dictionary to store counts
    frog_counts = defaultdict(int)
    
    # Supported audio file extensions
    audio_extensions = {'.wav'}
    
    # Count recordings per frog species
    for file in Path(directory_path).iterdir():
        if file.suffix.lower() in audio_extensions:
            parts = file.stem.split('_')
            frog_species_parts = []
            
            # Iterate through parts until we find a number
            for part in parts:
                if part.isdigit():
                    break
                frog_species_parts.append(part)
            
            if frog_species_parts:  # Only proceed if we found at least one non-number part
                frog_species = '_'.join(frog_species_parts)
                frog_counts[frog_species] += 1
    
    # Filter for species with few recordings
    few_recordings = {
        species: count 
        for species, count in frog_counts.items() 
    }
    
    return few_recordings

def print_frog_report(few_recordings, output_csv=None):
    """Print a formatted report of frogs with few recordings and optionally save to CSV."""
    if not few_recordings:
        print("All frog species have sufficient recordings (10 or more each).")
        if output_csv:
            with open(output_csv, 'w', newline='') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(["Species", "Recording Count"])
                writer.writerow(["All species have sufficient recordings", ""])
        return
    
    # Print console report
    print("Frog species with fewer than 10 recordings:")
    print("-" * 45)
    for species, count in sorted(few_recordings.items(), key=lambda item: (item[1], item[0])):
        print(f"{species.ljust(30)}: {count} recording{'s' if count != 1 else ''}")
    print("-" * 45)
    print(f"Total under-represented species: {len(few_recordings)}")
    
    # Save to CSV if requested
    if output_csv:
        with open(output_csv, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Species", "Recording Count"])
            for species, count in sorted(few_recordings.items(), key=lambda item: (item[1], item[0])):
                writer.writerow([species, count])


# Example usage
if __name__ == "__main__":
    directory_path = "../Datasets/clipped-3.05s/"  # Change this to your directory
    # output_csv = "frog_recording_report.csv"  # Output CSV filename
    few_recordings = analyze_frog_recordings(directory_path, min_threshold=10)
    print_frog_report(few_recordings)

Frog species with fewer than 10 recordings:
---------------------------------------------
Painted                       : 276 recordings
Weale                         : 390 recordings
De                            : 410 recordings
Peninsula                     : 425 recordings
Eastern                       : 725 recordings
Banded                        : 982 recordings
Rain                          : 1158 recordings
Clicking                      : 1248 recordings
Southern                      : 1716 recordings
none                          : 2441 recordings
Mountain                      : 2884 recordings
other                         : 5275 recordings
---------------------------------------------
Total under-represented species: 12


## Clip files to 3.05 seconds

In [None]:
import os
import numpy as np
from pathlib import Path
from scipy.io import wavfile

def process_audio_files(input_dir, output_dir="../Datasets/clipped-3.05s/"):
    """
    Process mono audio files:
    - For files >2 seconds: create 2-second clips centered at max amplitude points
    - For files <=2 seconds: skip (commented out but can be enabled)
    - All files saved to output directory
    """
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    
    output_path.mkdir(exist_ok=True)
    
    processed_count = 0
    clipped_count = 0
    copied_count = 0
    clipped_duration = 3.05
    
    for filepath in input_path.glob('*.wav'):
        try:
            # Read WAV file (mono)
            sample_rate, data = wavfile.read(filepath)
            
            # Ensure mono audio
            if len(data.shape) > 1:
                print(f"Skipping {filepath.name} - stereo audio not supported")
                continue
            
            duration = len(data) / sample_rate
            output_file = output_path / filepath.name
            
            if duration <= clipped_duration:
                # For short files, skip (can uncomment to copy)
                # wavfile.write(output_file, sample_rate, data)
                # copied_count += 1
                # print(f"Copied: {filepath.name} ({duration:.2f}s)")
                continue
            else:
                abs_data = np.abs(data)
                sorted_indices = np.argsort(abs_data)
                max_idx = sorted_indices[-1]
                
                # Find secondary peak
                exclusion_radius = int((clipped_duration/2) * sample_rate)  # 1 second radius
                valid_mask = np.ones(len(data), dtype=bool)
                valid_mask[max(0, max_idx-exclusion_radius):min(len(data), max_idx+exclusion_radius)] = False
                
                if np.any(valid_mask):
                    modified_data = abs_data.copy()
                    modified_data[~valid_mask] = 0
                    second_max_idx = np.argmax(modified_data)
                    
                    # Only accept if secondary peak is at least 10% as loud as primary
                    if abs_data[second_max_idx] < 0.1 * abs_data[max_idx]:
                        second_max_idx = None
                else:
                    second_max_idx = None

                # Create 2-second clip around primary peak
                target_len = int(round(clipped_duration * sample_rate))
                half_window = target_len // 2  # 3.05s clip => half window in samples


                # --- primary clip around primary peak ---
                start = max(0, max_idx - half_window)
                end   = min(len(data), max_idx + half_window)
                primary_clip = data[start:end]

                # Pad to exact target_len (symmetric)
                if len(primary_clip) < target_len:
                    remaining = target_len - len(primary_clip)
                    pad_before = int(remaining // 2)
                    pad_after  = int(remaining - pad_before)
                    primary_clip = np.pad(primary_clip, (pad_before, pad_after), mode='constant')

                wavfile.write(output_path / f"{filepath.stem}_primary.wav", sample_rate, primary_clip)
                clipped_count += 1

                # --- secondary clip (if valid peak exists) ---
                if second_max_idx is not None:
                    start2 = max(0, second_max_idx - half_window)
                    end2   = min(len(data), second_max_idx + half_window)
                    secondary_clip = data[start2:end2]

                    if len(secondary_clip) < target_len:
                        remaining = target_len - len(secondary_clip)
                        pad_before = int(remaining // 2)
                        pad_after  = int(remaining - pad_before)
                        secondary_clip = np.pad(secondary_clip, (pad_before, pad_after), mode='constant')

                    wavfile.write(output_path / f"{filepath.stem}_secondary.wav", sample_rate, secondary_clip)
                    clipped_count += 1



                # half_window = int((clipped_duration/2) * sample_rate)  # 1 second = half of 2s clip
                # primary_clip = data[max(0, max_idx - half_window):min(len(data), max_idx + half_window)]
                
                # # Pad if needed
                # if len(primary_clip) < clipped_duration * sample_rate:
                #     pad_before = (clipped_duration * sample_rate - len(primary_clip)) // 4
                #     pad_after = (clipped_duration * sample_rate - len(primary_clip)) - pad_before
                #     primary_clip = np.pad(primary_clip, (pad_before, pad_after), mode='constant')
                
                # wavfile.write(output_path / f"{filepath.stem}_primary.wav", sample_rate, primary_clip)
                # clipped_count += 1

                # # Create secondary clip if valid peak exists
                # if second_max_idx is not None:
                #     secondary_clip = data[max(0, second_max_idx - half_window):min(len(data), second_max_idx + half_window)]
                    
                #     if len(secondary_clip) < clipped_duration * sample_rate:
                #         pad_before = (clipped_duration * sample_rate - len(secondary_clip)) // 2
                #         pad_after = (clipped_duration * sample_rate - len(secondary_clip)) - pad_before
                #         secondary_clip = np.pad(secondary_clip, (pad_before, pad_after), mode='constant')
                    
                #     wavfile.write(output_path / f"{filepath.stem}_secondary.wav", sample_rate, secondary_clip)
                #     clipped_count += 1
                
                processed_count += 1
                
        except Exception as e:
            print(f"Error processing {filepath.name}: {str(e)}")
            continue
    
    print("\nProcessing complete!")
    print(f"Processed {processed_count} files")
    print(f"- Created {clipped_count} {clipped_duration}-second clips")
    print(f"- {copied_count} files copied as-is (<=2 seconds)")
    print(f"Output directory: {output_path.resolve()}")

if __name__ == "__main__":
    input_directory = "../Datasets/Raw-audio/frog_wav/" 
    process_audio_files(input_directory)


Processing complete!
Processed 7570 files
- Created 15054 3.05-second clips
- 0 files copied as-is (<=2 seconds)
Output directory: /home/james/Documents/semester-2/skripsie/code/Datasets/clipped-3.05s


In [None]:
import os
import numpy as np
from pathlib import Path
from scipy.io import wavfile

def process_audio_files(input_dir, clips_per_file=2, output_dir="../Datasets/mega-clipped-2s/"):
    """
    Process mono audio files:
    - For files >2 seconds: create specified number of 2-second clips
    - For files <=2 seconds: create as many clips as possible with max 1/3 overlap
    - Clips are distributed evenly across the file with 1/3 overlap
    """
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    
    output_path.mkdir(exist_ok=True)
    
    processed_count = 0
    total_clips = 0
    
    for filepath in input_path.glob('*.wav'):
        try:
            # Read WAV file (mono)
            sample_rate, data = wavfile.read(filepath)
            
            # Ensure mono audio
            if len(data.shape) > 1:
                print(f"Skipping {filepath.name} - stereo audio not supported")
                continue
            
            duration = len(data) / sample_rate
            clip_length = 2.0  # 2-second clips
            samples_per_clip = int(clip_length * sample_rate)
            
            if duration <= clip_length:
                # For short files, create as many clips as possible with max 1/3 overlap
                possible_clips = max(1, int(1 + (duration * sample_rate - samples_per_clip) / (samples_per_clip * (2/3))))
                step_size = max(1, (len(data) - samples_per_clip) // max(1, possible_clips - 1))
            else:
                possible_clips = clips_per_file
                step_size = max(1, (len(data) - samples_per_clip) // max(1, possible_clips - 1))
            
            # Ensure we don't create more clips than possible
            actual_clips = min(possible_clips, clips_per_file)
            
            # Calculate start positions with 1/3 overlap
            start_positions = []
            for i in range(actual_clips):
                if actual_clips == 1:
                    pos = max(0, (len(data) - samples_per_clip) // 2)  # center if only one clip
                else:
                    pos = min(len(data) - samples_per_clip, 
                             int(i * (len(data) - samples_per_clip) / max(1, actual_clips - 1)))
                start_positions.append(pos)
            
            # Create clips
            for i, start in enumerate(start_positions):
                end = start + samples_per_clip
                clip = data[start:end]
                
                # Pad if needed (shouldn't happen with proper positioning)
                if len(clip) < samples_per_clip:
                    pad_before = (samples_per_clip - len(clip)) // 2
                    pad_after = samples_per_clip - len(clip) - pad_before
                    clip = np.pad(clip, (pad_before, pad_after), mode='constant')
                
                # Save clip
                clip_name = f"{filepath.stem}_clip{i+1}.wav"
                wavfile.write(output_path / clip_name, sample_rate, clip)
                total_clips += 1
            
            processed_count += 1
            print(f"Processed {filepath.name} - created {actual_clips} clips")
            
        except Exception as e:
            print(f"Error processing {filepath.name}: {str(e)}")
            continue
    
    print("\nProcessing complete!")
    print(f"Processed {processed_count} files")
    print(f"- Created {total_clips} total clips")
    print(f"Output directory: {output_path.resolve()}")

if __name__ == "__main__":
    input_directory = "../Datasets/Raw-audio/frog_wav_nope/"
    process_audio_files(input_directory, clips_per_file=4)  # Example: 3 clips per file

## Rename files to first word_number

In [3]:
import os
import re
from collections import defaultdict

def rename_wav_files(directory):
    # Get all .wav files in the directory
    wav_files = [f for f in os.listdir(directory) if f.lower().endswith('.wav')]
    
    # Sort the files to ensure consistent ordering
    wav_files.sort()
    
    # Dictionary to keep track of counters for each prefix
    counters = defaultdict(int)
    
    for filename in wav_files:
        # Split the filename at the first underscore
        parts = re.split('_', filename, 1)
        
        # Get the first part (before first underscore) or whole filename if no underscore
        prefix = parts[0]
        
        # Increment the counter for this prefix
        counters[prefix] += 1
        
        # Create new filename
        new_name = f"{prefix}_{counters[prefix]}.wav"
        
        # Full paths for old and new names
        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_name)
        
        # Rename the file
        try:
            os.rename(old_path, new_path)
            print(f"Renamed: {filename} -> {new_name}")
        except OSError as e:
            print(f"Error renaming {filename}: {e}")

if __name__ == "__main__":
    # Get the directory containing the .wav files
    # dir_path = input("Enter the directory path containing .wav files: ").strip()
    dir_path = "../Datasets/clipped-3.05s/"
    
    # Verify the directory exists
    if os.path.isdir(dir_path):
        rename_wav_files(dir_path)
        print("Renaming complete!")
    else:
        print("Error: The specified directory does not exist.")

Renamed: Banded_1_segment_1.wav -> Banded_1.wav
Renamed: Banded_2_segment_1.wav -> Banded_2.wav
Renamed: Banded_4_segment_1.wav -> Banded_3.wav
Renamed: Banded_5_segment_1.wav -> Banded_4.wav
Renamed: Banded_6_segment_1.wav -> Banded_5.wav
Renamed: Banded_Stream_Frog_100_primary.wav -> Banded_6.wav
Renamed: Banded_Stream_Frog_100_secondary.wav -> Banded_7.wav
Renamed: Banded_Stream_Frog_101_primary.wav -> Banded_8.wav
Renamed: Banded_Stream_Frog_101_secondary.wav -> Banded_9.wav
Renamed: Banded_Stream_Frog_102_primary.wav -> Banded_10.wav
Renamed: Banded_Stream_Frog_102_secondary.wav -> Banded_11.wav
Renamed: Banded_Stream_Frog_103_primary.wav -> Banded_12.wav
Renamed: Banded_Stream_Frog_103_secondary.wav -> Banded_13.wav
Renamed: Banded_Stream_Frog_104_primary.wav -> Banded_14.wav
Renamed: Banded_Stream_Frog_104_secondary.wav -> Banded_15.wav
Renamed: Banded_Stream_Frog_105_primary.wav -> Banded_16.wav
Renamed: Banded_Stream_Frog_105_secondary.wav -> Banded_17.wav
Renamed: Banded_Stre

## Renaming for southern african frog calls set

In [10]:
import os

# Target keywords (case-insensitive)
keywords = ['Banded', 'Clicking', 'De', 'Eastern', 'Mountain', 'Painted', 
            'Peninsula', 'Rain', 'Southern', 'Weale', 'none', 'other']

# Directory containing files
directory = '../Datasets/old/calls/'  # Current directory (change if needed)

# Counters for each keyword
counters = {keyword: 1 for keyword in keywords}

# Iterate through files
for filename in os.listdir(directory):
    if filename.endswith('.wav'):
        lower_filename = filename.lower()
        matched_keyword = None
        
        # Check for keyword matches
        for keyword in keywords:
            if keyword.lower() in lower_filename:
                matched_keyword = keyword
                break
        
        # If no keyword matched, default to 'other'
        if not matched_keyword:
            matched_keyword = 'other'
        
        # Generate new filename
        new_name = f"{matched_keyword}__{counters[matched_keyword]}.wav"
        counters[matched_keyword] += 1
        
        # Rename the file
        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_name)
        
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} → {new_name}")

Renamed: Eastern_1.wav → Eastern__1.wav
Renamed: other_19.wav → other__1.wav
Renamed: other_41.wav → other__2.wav
Renamed: Rain_3.wav → Rain__1.wav
Renamed: Rain_15.wav → Rain__2.wav
Renamed: Rain_8.wav → Rain__3.wav
Renamed: other_42.wav → other__3.wav
Renamed: other_33.wav → other__4.wav
Renamed: other_53.wav → other__5.wav
Renamed: other_99.wav → other__6.wav
Renamed: other_52.wav → other__7.wav
Renamed: other_37.wav → other__8.wav
Renamed: Rain_4.wav → Rain__4.wav
Renamed: other_4.wav → other__9.wav
Renamed: Southern_2.wav → Southern__1.wav
Renamed: other_100.wav → other__10.wav
Renamed: Southern_1.wav → Southern__2.wav
Renamed: other_45.wav → other__11.wav
Renamed: De_6.wav → De__1.wav
Renamed: other_32.wav → other__12.wav
Renamed: other_91.wav → other__13.wav
Renamed: Rain_19.wav → Rain__5.wav
Renamed: other_61.wav → other__14.wav
Renamed: other_71.wav → other__15.wav
Renamed: Peninsula_1.wav → Peninsula__1.wav
Renamed: other_79.wav → other__16.wav
Renamed: Rain_14.wav → Rain__6.

## Convert all of the audio files into Spectrograms and save as .h5

In [None]:
import os
import numpy as np
import librosa
from joblib import Parallel, delayed
import multiprocessing
import h5py
from tqdm import tqdm
import noisereduce as nr

# Configuration
AUDIO_DIR = "../Datasets/clipped-3.05s/"
OUTPUT_DIR = "../Datasets/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SAMPLE_RATE = 22050
N_FFT = 1028
HOP_LENGTH = 128
N_MELS = 256

SAMPLE_RATE = 22050
N_FFT = 2048  # FFT window size
HOP_LENGTH = 256  # Hop length
N_MELS = 128  # Number of Mel bands

# Desired spectrogram size
TARGET_HEIGHT = 128
TARGET_WIDTH = 128

def process_file(file, duration=3.05):
    try:
        if file.lower().endswith('.wav'):
            audio_path = os.path.join(AUDIO_DIR, file)
            y, sr = librosa.load(audio_path, sr=SAMPLE_RATE, res_type='kaiser_fast', mono=True)
            
            # Ensure length is an integer
            length = int(duration * sr)  # Cast length to integer
            
            # Clip the audio around the center
            y_clipped = y[len(y)//2 - length//2 : len(y)//2 + length//2]
            
            # Generate Mel spectrogram
            S = librosa.feature.melspectrogram(y=y_clipped, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)
            S_dB = librosa.power_to_db(S, ref=np.max)
            
            # Resize to target size
            S_resized = librosa.util.fix_length(S_dB, size=TARGET_WIDTH, axis=-1)[:TARGET_HEIGHT, :]
            
            key = os.path.splitext(file)[0]
            return key, S_resized
    except Exception as e:
        print(f"\nSkipped {file}: {str(e)}")
        return None

# Get list of files first for accurate tqdm progress
files = [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith('.wav')]
print(f"Processing {len(files)} audio files...")

batch_size = 500
for i in tqdm(range(0, len(files), batch_size)):
    batch = files[i:i+batch_size]
    results = Parallel(n_jobs=2)(delayed(process_file)(f) for f in batch)
    with h5py.File(os.path.join(OUTPUT_DIR, 'FD_5.1.h5'), 'a') as hf:  # 'a' for append
        for result in results:
            if result is not None:
                key, S_dB = result
                hf.create_dataset(key, data=S_dB)


print(f"\nDone! Spectrograms saved to {OUTPUT_DIR}")

Processing 17930 audio files...


  0%|          | 0/36 [00:00<?, ?it/s]

100%|██████████| 36/36 [01:31<00:00,  2.55s/it]


Done! Spectrograms saved to ../Datasets/





In [None]:
import h5py

# Path to your .h5 file
h5_file_path = 'path/to/your/file.h5'

# Open the HDF5 file
with h5py.File(h5_file_path, 'r') as file:
    # List all the keys (dataset names) in the HDF5 file
    print("Available datasets:", list(file.keys()))
    
    # Assuming you know the dataset name (e.g., the first key in the file)
    # You can print the shape of the dataset to see the matrix size
    dataset_name = list(file.keys())[0]  # or replace with the specific dataset name you want
    dataset = file[dataset_name]
    
    # Print the shape of the spectrogram matrix
    print(f"Shape of the spectrogram matrix: {dataset.shape}")


In [6]:
import os
import numpy as np
import librosa
from joblib import Parallel, delayed
from tqdm import tqdm
import noisereduce as nr

# Configuration
AUDIO_DIR = "../Datasets/clipped-3.05s/"
OUTPUT_DIR = "../Datasets/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SAMPLE_RATE = 22050
N_FFT = 1028
HOP_LENGTH = 128
N_MELS = 256

def process_file(file):
    try:
        if file.lower().endswith('.wav'):
            audio_path = os.path.join(AUDIO_DIR, file)
            y, sr = librosa.load(audio_path, sr=SAMPLE_RATE, res_type='kaiser_fast', mono=True)
            y = nr.reduce_noise(y=y, sr=sr)
            S = librosa.feature.melspectrogram(
                y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS
            )
            S_dB = librosa.power_to_db(S, ref=np.max)
            key = os.path.splitext(file)[0]
            return key, S_dB
    except Exception as e:
        print(f"\nSkipped {file}: {str(e)}")
        return None

# Get list of files
files = [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith('.wav')]
print(f"Processing {len(files)} audio files...")

batch_size = 500
for i in tqdm(range(0, len(files), batch_size), desc="Batches"):
    batch = files[i:i+batch_size]
    results = Parallel(n_jobs=2)(delayed(process_file)(f) for f in batch)

    # Collect valid results into dict
    spectros = {key: S_dB for key, S_dB in results if result is not None for key, S_dB in [result]}

    if spectros:
        npz_path = os.path.join(OUTPUT_DIR, f"FD_5.0_batch{i//batch_size}.npz")
        np.savez_compressed(npz_path, **spectros)

print(f"\nDone! Spectrograms saved as NPZ files in {OUTPUT_DIR}")

Processing 17930 audio files...


  sig_mult_above_thresh = (abs_sig_stft - sig_stft_smooth) / sig_stft_smooth



Skipped other_264.wav: Audio buffer is not finite everywhere


Batches:   3%|▎         | 1/36 [00:47<27:49, 47.70s/it]


TypeError: cannot unpack non-iterable NoneType object

In [17]:
import os
import numpy as np
import librosa
from joblib import Parallel, delayed
import multiprocessing
from tqdm import tqdm
import random
import noisereduce as nr

# Configuration
AUDIO_DIR = "../Datasets/clipped-3.05s/"
OUTPUT_DIR = "../Datasets/folds_10_5.0/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SAMPLE_RATE = 22050
N_FFT = 1028
HOP_LENGTH = 512
N_MELS = 256

def process_file(file):
    try:
        if file.lower().endswith('.wav'):
            audio_path = os.path.join(AUDIO_DIR, file)
            y, sr = librosa.load(audio_path, sr=SAMPLE_RATE, res_type='kaiser_fast', mono=True)
            # y = nr.reduce_noise(y=y, sr=sr) 
            S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_MELS)
            S_dB = librosa.power_to_db(S, ref=np.max)
            key = os.path.splitext(file)[0]
            return key, S_dB
    except Exception as e:
        print(f"\nSkipped {file}: {str(e)}")
        return None

def create_batches(files, num_batches):
    # Shuffle all files randomly
    random.shuffle(files)
    
    # Calculate batch size
    batch_size = len(files) // num_batches
    remaining_files = len(files) % num_batches
    
    batches = []
    
    # Create batches of size 'batch_size'
    for i in range(num_batches):
        if i < remaining_files:
            # Add an extra file to the first 'remaining_files' batches
            batches.append(files[i * (batch_size + 1):(i + 1) * (batch_size + 1)])
        else:
            batches.append(files[i * batch_size + remaining_files:(i + 1) * batch_size + remaining_files])
    
    return batches

# Get list of files first for accurate tqdm progress
files = [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith('.wav')]
print(f"Processing {len(files)} audio files...")

# Create exactly 11 batches
num_batches = 11
batches = create_batches(files, num_batches)

# Process and save spectrograms for each batch
for i, batch in tqdm(enumerate(batches), total=len(batches)):
    results = Parallel(n_jobs=multiprocessing.cpu_count() - 1)(delayed(process_file)(f) for f in batch)
    
    # Save the results into an .npz file
    npz_filename = os.path.join(OUTPUT_DIR, f'shard{i}.npz')
    spectrograms = {}
    
    for result in results:
        if result is not None:
            key, S_dB = result
            spectrograms[key] = S_dB

    # Save all spectrograms in a single .npz file
    if spectrograms:
        np.savez(npz_filename, **spectrograms)

print(f"\nDone! Spectrograms saved to {OUTPUT_DIR}")




Processing 17930 audio files...


100%|██████████| 11/11 [00:31<00:00,  2.84s/it]


Done! Spectrograms saved to ../Datasets/folds_10_5.0/



