In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm
import sys

# ===============================
# USER SETTINGS
# ===============================
NOISE_SOURCE_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/Audio Wise V1.0"
METADATA_PATH = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/Metadata V1.0 FSC22.csv"
EXISTING_OUTPUT_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/3_SegmentedAudios"

TARGET_DEFICIT = 2969

ALLOWED_CLASSES = [
    'Rain', 'Thunderstorm', 'WaterDrops', 'Wind', 'Silence', 
    'TreeFalling', 'Whistling', 'Insect', 'Frog', 
    'BirdChirping', 'WingFlapping', 'Squirrel', 'Footsteps', 'Clapping',
    'WolfHowl', 'Speaking'
]

# ===============================
# STEP 1: LOAD EXISTING MANIFEST
# ===============================
manifest_path = os.path.join(EXISTING_OUTPUT_FOLDER, "master_manifest_NoEagle.csv")

print("Loading existing manifest structure...")

if not os.path.exists(manifest_path):
    print("‚ùå No existing manifest found. Please ensure master_manifest.csv exists.")
    sys.exit(1)

try:
    existing_manifest = pd.read_csv(manifest_path)
    manifest_columns = existing_manifest.columns.tolist()
    print(f"‚úÖ Found existing manifest with {len(manifest_columns)} columns")
except Exception as e:
    print(f"‚ùå Error reading existing manifest: {e}")
    sys.exit(1)

# ===============================
# LOAD & FILTER METADATA
# ===============================
print("\nLoading and filtering metadata...")
try:
    meta_df = pd.read_csv(METADATA_PATH)
    
    # Filter by class
    filtered_df = meta_df[meta_df['Class Name'].isin(ALLOWED_CLASSES)].copy()
    
    # Shuffle to get a random mix
    filtered_df = filtered_df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    print(f"‚úÖ Found {len(filtered_df)} valid noise files.")
    
except Exception as e:
    print(f"‚ùå Error reading metadata: {e}")
    sys.exit(1)

# ===============================
# PREPARE FOR COPYING
# ===============================
# Calculate next group_id to avoid conflicts
if len(existing_manifest) > 0 and 'group_id' in manifest_columns:
    try:
        next_group_id = int(existing_manifest['group_id'].max()) + 1
    except:
        next_group_id = 10000
else:
    next_group_id = 10000

print(f"Starting group_id from: {next_group_id}")

# ===============================
# COPY FILES
# ===============================
created_count = 0
segments_data = []

print(f"\nüìâ Target to fill: {TARGET_DEFICIT} files")
print(f"üìÇ Copying from: {NOISE_SOURCE_FOLDER}")

# Iterate through the filtered list
for index, row in tqdm(filtered_df.iterrows(), total=min(len(filtered_df), TARGET_DEFICIT)):
    if created_count >= TARGET_DEFICIT:
        break
        
    original_filename = str(row['Dataset File Name']).strip()
    noise_class = str(row['Class Name']).strip()
    
    source_path = os.path.join(NOISE_SOURCE_FOLDER, original_filename)
    
    if not os.path.exists(source_path):
        continue
        
    # Prepare Destination
    new_filename = f"GeneralForest_{noise_class}_{original_filename}"
    
    location = "GeneralForest"
    label = "NoEagleSound"
    
    # Save Folder: Output / GeneralForest / NoEagleSound
    save_folder = os.path.join(EXISTING_OUTPUT_FOLDER, location, label)
    os.makedirs(save_folder, exist_ok=True)
    
    dest_path = os.path.join(save_folder, new_filename)
    
    try:
        # COPY THE FILE
        shutil.copy2(source_path, dest_path)
        
        # Create a row matching the existing manifest structure
        new_row = {}
        
        # Fill in columns that we have data for
        column_mapping = {
            'label': label,
            'label_base': label,
            'label_full': label,
            'group_id': next_group_id + created_count,
            'location_id': location,
            'segment_filename': new_filename,
            'output_folder': os.path.join(location, label),
            'source_audio': original_filename,
            'start': '',
            'end': '',
            'quality': '',
            'selection_numbers': '',
        }
        
        # Initialize all columns from existing manifest structure
        for col in manifest_columns:
            if col in column_mapping:
                new_row[col] = column_mapping[col]
            else:
                new_row[col] = ''  # Leave blank if no data available
        
        segments_data.append(new_row)
        created_count += 1
        
    except Exception as e:
        print(f"‚ö†Ô∏è Error copying {original_filename}: {e}")
        continue

print(f"\n‚ú® Finished! Copied {created_count} noise files.")

# ===============================
# UPDATE MASTER MANIFEST
# ===============================
if segments_data:
    print("\nUpdating master_manifest.csv...")
    try:
        new_df = pd.DataFrame(segments_data)
        
        # Ensure column order matches existing manifest
        new_df = new_df[manifest_columns]
        
        # Append to existing manifest
        combined_df = pd.concat([existing_manifest, new_df], ignore_index=True)
        
        # Save updated manifest
        combined_df.to_csv(manifest_path, index=False)
        print(f"‚úÖ Master manifest updated successfully. Added {created_count} new rows.")
        
    except Exception as e:
        print(f"‚ùå Error updating manifest: {e}")
        # Backup save
        pd.DataFrame(segments_data).to_csv(
            os.path.join(EXISTING_OUTPUT_FOLDER, "added_noise_manifest_backup.csv"), 
            index=False
        )
        print("   Saved new data to 'added_noise_manifest_backup.csv' instead.")
else:
    print("\n‚ö†Ô∏è No files were copied. Manifest not updated.")

Loading existing manifest structure...
‚úÖ Found existing manifest with 12 columns

Loading and filtering metadata...
‚úÖ Found 1125 valid noise files.
Starting group_id from: 1813

üìâ Target to fill: 2969 files
üìÇ Copying from: D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/Audio Wise V1.0


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1125/1125 [00:01<00:00, 626.88it/s]


‚ú® Finished! Copied 1125 noise files.

Updating master_manifest.csv...
‚úÖ Master manifest updated successfully. Added 1125 new rows.





In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm
import sys
import numpy as np
import librosa
import soundfile as sf

# ===============================
# USER SETTINGS
# ===============================
NOISE_SOURCE_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive2/audio/audio"
METADATA_PATH = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive2/esc50.csv"
EXISTING_OUTPUT_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/3_SegmentedAudios"

TARGET_DEFICIT = 2229 

ALLOWED_CLASSES = [
    'chirping_birds', 'thunderstorm', 'crow', 'door_wood_knock', 'pouring_water', 
    'clapping', 'church_bells', 'water_drops', 'wind', 'sheep', 'frog', 
    'fireworks', 'cow', 'crackling_fire', 'hen', 'insects', 'hand_saw', 
    'pig', 'rooster', 'sea_waves', 'dog', 'breathing', 'siren', 'snoring', 
    'airplane', 'cat', 'door_wood_creaking', 'crickets', 'coughing', 
    'chainsaw', 'drinking_sipping', 'laughing', 'glass_breaking', 
    'engine', 'footsteps', 'crying_baby', 'can_opening'
]

# ===============================
# LOAD MANIFESTS
# ===============================
manifest_path = os.path.join(EXISTING_OUTPUT_FOLDER, "master_manifest_NoEagle.csv")
if not os.path.exists(manifest_path):
    print("‚ùå No existing manifest found.")
    sys.exit(1)

existing_manifest = pd.read_csv(manifest_path)
manifest_columns = existing_manifest.columns.tolist()

# Determine next group ID
if 'group_id' in existing_manifest.columns and not existing_manifest.empty:
    next_group_id = int(existing_manifest['group_id'].max()) + 1
else:
    next_group_id = 10000

# ===============================
# PREPARE SOURCE LIST
# ===============================
print("Loading metadata...")
meta_df = pd.read_csv(METADATA_PATH)
filtered_df = meta_df[meta_df['category'].isin(ALLOWED_CLASSES)].copy()
# Shuffle once
filtered_df = filtered_df.sample(frac=1, random_state=42).reset_index(drop=True)

print(f"‚úÖ Found {len(filtered_df)} unique source files.")
print(f"üìâ Target needed: {TARGET_DEFICIT}")

# ===============================
# FILL LOOP (COPY -> AUGMENT)
# ===============================
created_count = 0
segments_data = []
source_index = 0
total_sources = len(filtered_df)

pbar = tqdm(total=TARGET_DEFICIT)

while created_count < TARGET_DEFICIT:
    # Cycle through the list repeatedly if needed
    row = filtered_df.iloc[source_index % total_sources]
    source_index += 1
    
    orig_filename = str(row['filename']).strip()
    noise_class = str(row['category']).strip()
    source_path = os.path.join(NOISE_SOURCE_FOLDER, orig_filename)
    
    if not os.path.exists(source_path): continue

    # Determine: Copy (Round 1) or Augment (Round 2+)?
    is_augmentation = (source_index > total_sources)
    
    # Destination Setup
    location = "GeneralForest"
    label = "NoEagleSound"
    save_folder = os.path.join(EXISTING_OUTPUT_FOLDER, location, label)
    os.makedirs(save_folder, exist_ok=True)
    
    try:
        if not is_augmentation:
            # --- METHOD A: DIRECT COPY (Fast) ---
            new_filename = f"GeneralForest_{noise_class}_{orig_filename}"
            dest_path = os.path.join(save_folder, new_filename)
            shutil.copy2(source_path, dest_path)
            
        else:
            # --- METHOD B: AUGMENTATION (Fill the rest) ---
            # Load
            y, sr = librosa.load(source_path, sr=None)
            
            # Augment: Pitch Shift (Randomly slightly higher or lower)
            steps = np.random.uniform(-1.5, 1.5)
            y_aug = librosa.effects.pitch_shift(y, sr=sr, n_steps=steps)
            
            # Save as new file
            new_filename = f"GeneralForest_{noise_class}_aug{source_index}_{orig_filename}"
            dest_path = os.path.join(save_folder, new_filename)
            sf.write(dest_path, y_aug, sr)

        # Add to Manifest Data
        new_row = {col: '' for col in manifest_columns} # Init empty
        new_row.update({
            'label': label,
            'label_base': label,
            'label_full': label,
            'group_id': next_group_id + created_count,
            'location_id': location,
            'segment_filename': new_filename,
            'output_folder': os.path.join(location, label),
            'source_audio': orig_filename,
            'start': 0.0,
            'end': 5.0,
            'quality': 'N/A',
            'selection_numbers': 'None'
        })
        
        segments_data.append(new_row)
        created_count += 1
        pbar.update(1)

    except Exception as e:
        # print(f"Error: {e}")
        pass

pbar.close()
print(f"\n‚ú® DONE! Created {created_count} files ({len(filtered_df)} originals + {created_count - len(filtered_df)} augmented).")

# ===============================
# SAVE MANIFEST
# ===============================
if segments_data:
    print("Updating manifest...")
    new_df = pd.DataFrame(segments_data)
    new_df = new_df[manifest_columns] # Align columns
    combined = pd.concat([existing_manifest, new_df], ignore_index=True)
    combined.to_csv(manifest_path, index=False)
    print("‚úÖ Manifest updated.")

Loading metadata...
‚úÖ Found 1440 unique source files.
üìâ Target needed: 2229


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2229/2229 [00:49<00:00, 44.95it/s] 


‚ú® DONE! Created 2229 files (1440 originals + 789 augmented).
Updating manifest...
‚úÖ Manifest updated.





## **DEFICIT FILLER USING XENO CANTO DATASET**

In [None]:
import os
import shutil
import pandas as pd
import librosa
import soundfile as sf
import numpy as np
from tqdm import tqdm
import sys

# ===============================
# USER SETTINGS
# ===============================
# 1. Audio Source (Point to the folder containing 'A-M' or subfolders)
# Based on your diagnostic: D:/.../archive3/A-M
NOISE_SOURCE_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive3/A-M"

# 2. Metadata CSV
METADATA_PATH = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive3/train_extended.csv"

# 3. Output
EXISTING_OUTPUT_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/3_SegmentedAudios"
MANIFEST_PATH = os.path.join(EXISTING_OUTPUT_FOLDER, "master_manifest_NoEagle.csv")

# 4. Target
TARGET_DEFICIT = 14890
SEGMENT_LENGTH = 5.0
TARGET_SR = 16000  # YAMNet Standard

# 5. Priority List (Hard Negatives)
PRIORITY_SPECIES = [
    "Bald Eagle", "Red-tailed Hawk", "Broad-winged Hawk", 
    "Cooper's Hawk", "Sharp-shinned Hawk", "Osprey", 
    "Peregrine Falcon", "Merlin", "American Kestrel", 
    "Northern Harrier", "Red-shouldered Hawk", "Swainson's Hawk",
    "Barred Owl", "Great Horned Owl"
]

# 6. Safety Exclusions
EXCLUDED_SCIENTIFIC = ["Pithecophaga jefferyi"]
EXCLUDED_COMMON = ["Philippine Eagle"]

print("="*70)
print("ü¶Ö PROCESSING KAGGLE BIRD DATASET (DEFICIT FILLER)")
print("="*70)

# ===============================
# STEP 1: CALCULATE NEED
# ===============================
if not os.path.exists(MANIFEST_PATH):
    print("‚ùå No existing manifest found. Please create master_manifest_NoEagle.csv first.")
    sys.exit(1)

existing_manifest = pd.read_csv(MANIFEST_PATH, encoding='utf-8-sig')
existing_manifest.columns = [c.strip() for c in existing_manifest.columns]
manifest_columns = existing_manifest.columns.tolist()
current_count = len(existing_manifest)
files_needed = TARGET_DEFICIT - current_count

print(f"\nüìä Current Status:")
print(f"   ‚Ä¢ Current NoEagle: {current_count}")
print(f"   ‚Ä¢ Target Count:    {TARGET_DEFICIT}")
print(f"   ‚Ä¢ Files Needed:    {files_needed}")

if files_needed <= 0:
    print("\n‚úÖ Deficit already filled!")
    sys.exit(0)

# Get next group ID
if 'group_id' in existing_manifest.columns:
    try:
        next_group_id = int(existing_manifest['group_id'].max()) + 1
    except:
        next_group_id = 60000
else:
    next_group_id = 60000

print(f"   ‚Ä¢ Starting group_id: {next_group_id}")

# ===============================
# STEP 2: LOAD & VALIDATE METADATA
# ===============================
print("\nüìã Loading metadata...")

if not os.path.exists(METADATA_PATH):
    print(f"‚ùå Metadata file not found: {METADATA_PATH}")
    sys.exit(1)

try:
    meta_df = pd.read_csv(METADATA_PATH, encoding='utf-8-sig')
    meta_df.columns = [c.strip() for c in meta_df.columns]
    
    # Detect column names
    col_common = None
    col_sci = None
    col_file = None
    
    for col in meta_df.columns:
        col_lower = col.lower()
        if 'common' in col_lower and 'name' in col_lower: col_common = col
        elif 'scientific' in col_lower or 'species' in col_lower: col_sci = col
        elif 'filename' in col_lower or 'file' in col_lower: col_file = col
    
    if not col_file:
        print(f"‚ùå Could not find filename column in metadata.")
        sys.exit(1)
    
    if not col_common: col_common = 'species' 
    
    # Exclude Philippine Eagle
    if col_sci and col_sci in meta_df.columns:
        meta_df = meta_df[~meta_df[col_sci].isin(EXCLUDED_SCIENTIFIC)].copy()
    
    if col_common in meta_df.columns:
        meta_df = meta_df[
            ~meta_df[col_common].astype(str).str.lower().str.contains('philippine eagle', na=False)
        ].copy()
    
    # Mark priority species
    if col_common in meta_df.columns:
        meta_df['is_priority'] = meta_df[col_common].apply(
            lambda x: any(p.lower() in str(x).lower() for p in PRIORITY_SPECIES)
        )
    else:
        meta_df['is_priority'] = False
    
    # Sort: priority first
    meta_df = meta_df.sort_values(by=['is_priority'], ascending=False)
    
    print(f"   ‚Ä¢ Total candidate files: {len(meta_df)}")
    print(f"   ‚Ä¢ Priority raptors: {meta_df['is_priority'].sum()}")
    
except Exception as e:
    print(f"‚ùå Error reading metadata: {e}")
    sys.exit(1)

# ===============================
# STEP 3: INDEX AUDIO FILES
# ===============================
print(f"\nüîç Indexing audio files in subfolders...")
file_map = {}
for root, dirs, files in os.walk(NOISE_SOURCE_FOLDER):
    for f in files:
        if f.endswith('.mp3') or f.endswith('.wav') or f.endswith('.ogg'):
            file_map[f] = os.path.join(root, f)

print(f"   ‚úÖ Indexed {len(file_map)} audio files.")

# ===============================
# STEP 4: PROCESS AUDIO
# ===============================
segments_data = []
created_count = 0
error_count = 0
skipped_count = 0

print(f"\nüéµ Processing audio files (resampling to {TARGET_SR}Hz)...")
pbar = tqdm(total=files_needed, desc="Creating segments")

for index, row in meta_df.iterrows():
    if created_count >= files_needed:
        break
    
    filename_csv = str(row[col_file]).strip()
    
    # Handle extensions
    candidates = [filename_csv]
    if not (filename_csv.endswith('.mp3') or filename_csv.endswith('.wav')):
        candidates = [filename_csv + '.mp3', filename_csv + '.wav']
        
    # Find file
    source_path = None
    real_filename = None
    
    for cand in candidates:
        if cand in file_map:
            source_path = file_map[cand]
            real_filename = cand
            break
            
    if not source_path:
        skipped_count += 1
        continue
    
    common_name = str(row.get(col_common, 'Unknown'))
    is_raptor = row.get('is_priority', False)
    
    try:
        # Load & Resample
        y, sr = librosa.load(source_path, sr=TARGET_SR, mono=True)
        duration = len(y) / sr
        num_chunks = int(duration // SEGMENT_LENGTH)
        
        if num_chunks < 1:
            skipped_count += 1
            continue
        
        for i in range(num_chunks):
            if created_count >= files_needed: break
            
            start_sample = int(i * SEGMENT_LENGTH * sr)
            end_sample = int((i + 1) * SEGMENT_LENGTH * sr)
            
            if end_sample > len(y): break
            
            chunk = y[start_sample:end_sample]
            
            loc_id = "ConfusingRaptors" if is_raptor else "GeneralBirds"
            label = "NoEagleSound"
            
            save_folder = os.path.join(EXISTING_OUTPUT_FOLDER, loc_id, label)
            os.makedirs(save_folder, exist_ok=True)
            
            safe_common = "".join([c for c in common_name if c.isalnum() or c in (' ', '_')]).strip().replace(' ', '_')
            safe_fname = os.path.splitext(real_filename)[0]
            new_filename = f"Kaggle_{safe_common}_{safe_fname}_seg{i:02d}.wav"
            
            save_path = os.path.join(save_folder, new_filename)
            sf.write(save_path, chunk, TARGET_SR)
            
            # Add to Data
            new_row = {col: '' for col in manifest_columns}
            new_row.update({
                'label': label,
                'label_base': label,  # <--- HERE IS THE FIX
                'label_full': label,
                'location_id': loc_id, 
                'segment_filename': new_filename,
                'output_folder': os.path.join(loc_id, label),
                'source_audio': real_filename,
                'start': 0.0, 
                'end': SEGMENT_LENGTH,
                'segment_start_time': 0.0, 
                'segment_end_time': SEGMENT_LENGTH,
                'segment_duration': SEGMENT_LENGTH,
                'group_id': next_group_id,
                'quality': 'High',
                'label_category': 'NoEagleSound'
            })
            
            segments_data.append(new_row)
            created_count += 1
            pbar.update(1)
            
        next_group_id += 1
        
    except Exception as e:
        error_count += 1
        continue

pbar.close()

# ===============================
# STEP 5: SAVE MANIFEST
# ===============================
if segments_data:
    print(f"\nüíæ Saving Manifest...")
    new_df = pd.DataFrame(segments_data)
    new_df = new_df[manifest_columns]
    
    combined = pd.concat([existing_manifest, new_df], ignore_index=True)
    combined.to_csv(MANIFEST_PATH, index=False)
    
    raptors = sum(1 for s in segments_data if 'ConfusingRaptors' in s['location_id'])
    general_count = len(new_df) - raptors
    
    print(f"‚úÖ SUCCESS!")
    print(f"   ‚Ä¢ Added: {len(new_df)} files")
    print(f"   ‚Ä¢ Raptors: {raptors}")
    print(f"   ‚Ä¢ General Birds: {general_count}")
    print(f"   ‚Ä¢ Total in Manifest: {len(combined)}")
else:
    print("\n‚ö†Ô∏è No files added. Check paths.")
    print(f"   Skipped {skipped_count} files (missing or too short)")

print("\n" + "="*70)
print("üéâ Processing Complete!")
print("="*70)

ü¶Ö PROCESSING KAGGLE BIRD DATASET (DEFICIT FILLER)

üìä Current Status:
   ‚Ä¢ Current NoEagle: 4458
   ‚Ä¢ Target Count:    14890
   ‚Ä¢ Files Needed:    10432
   ‚Ä¢ Starting group_id: 5167

üìã Loading metadata...
   ‚Ä¢ Total candidate files: 23784
   ‚Ä¢ Priority raptors: 505

üîç Indexing audio files in subfolders...
   ‚úÖ Indexed 14685 audio files.

üéµ Processing audio files (resampling to 16000Hz)...


Creating segments:   0%|          | 0/10432 [00:00<?, ?it/s]


‚ö†Ô∏è No files added. Check paths.
   Skipped 23784 files (missing or too short)

üéâ Processing Complete!





In [None]:
import os
import pandas as pd

# ===============================
# DIAGNOSTIC TOOL
# ===============================
NOISE_SOURCE_FOLDER = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/birds_songs/songs"
METADATA_PATH = r"D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive3/train_extended.csv"

print("--- DIAGNOSTIC START ---")

# 1. Check Folder
if os.path.exists(NOISE_SOURCE_FOLDER):
    print(f"‚úÖ Folder exists: {NOISE_SOURCE_FOLDER}")
    # Count files
    file_count = 0
    sample_files = []
    for root, dirs, files in os.walk(NOISE_SOURCE_FOLDER):
        for f in files:
            if f.endswith(('.mp3', '.wav')):
                file_count += 1
                if len(sample_files) < 5: sample_files.append(os.path.join(root, f))
    
    print(f"   Found {file_count} audio files.")
    if file_count > 0:
        print(f"   Sample path: {sample_files[0]}")
    else:
        print("   ‚ùå FOLDER IS EMPTY or contains no mp3/wav!")
else:
    print(f"‚ùå Folder NOT found: {NOISE_SOURCE_FOLDER}")

# 2. Check CSV
if os.path.exists(METADATA_PATH):
    print(f"\n‚úÖ CSV exists: {METADATA_PATH}")
    try:
        df = pd.read_csv(METADATA_PATH)
        print(f"   Columns: {list(df.columns)}")
        print(f"   Rows: {len(df)}")
        
        # Check Priority Species match
        PRIORITY = ["Bald Eagle", "Red-tailed Hawk"]
        mask = df.apply(lambda row: row.astype(str).str.contains('|'.join(PRIORITY), case=False).any(), axis=1)
        print(f"   Found {mask.sum()} rows matching 'Bald Eagle' or 'Red-tailed Hawk'")
        
    except Exception as e:
        print(f"   ‚ùå CSV Read Error: {e}")
else:
    print(f"‚ùå CSV NOT found: {METADATA_PATH}")

print("--- DIAGNOSTIC END ---")

--- DIAGNOSTIC START ---
‚úÖ Folder exists: D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive3/A-M
   Found 14685 audio files.
   Sample path: D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive3/A-M\aldfly\XC133197.mp3

‚úÖ CSV exists: D:/_3rd Year Class/1st Sem/Machine Learning/_ForLE/For DataSet/archive3/train_extended.csv
   Columns: ['rating', 'playback_used', 'ebird_code', 'channels', 'date', 'duration', 'filename', 'species', 'title', 'secondary_labels', 'bird_seen', 'sci_name', 'location', 'latitude', 'sampling_rate', 'type', 'elevation', 'bitrate_of_mp3', 'file_type', 'background', 'xc_id', 'url', 'country', 'author', 'primary_label', 'longitude', 'time', 'recordist', 'license']
   Rows: 23784
   Found 61 rows matching 'Bald Eagle' or 'Red-tailed Hawk'
--- DIAGNOSTIC END ---
