In [None]:
import os
import shutil
import pandas as pd
from tqdm import tqdm

# ===============================
# USER SETTINGS
# ===============================
# 1. Folder containing the FSC22 Audio files (Source)
NOISE_SOURCE_FOLDER = r"C:\Users\danie\Downloads\Audio Wise V1.0"

# 2. Path to the FSC22 Metadata CSV
METADATA_PATH = r"C:\Users\danie\Downloads\Metadata V1.0 FSC22.csv"

# 3. Your EXISTING output folder (Destination)
EXISTING_OUTPUT_FOLDER = r"C:\Users\danie\Downloads\LE\SegmentationAudios"

# 4. How many files do you need?
TARGET_DEFICIT = 385 

# 5. The Allowed Classes
ALLOWED_CLASSES = [
    'Rain', 'Thunderstorm', 'WaterDrops', 'Wind', 'Silence', 
    'TreeFalling', 'Whistling', 'Insect', 'Frog', 
    'BirdChirping', 'WingFlapping', 'Squirrel'
]

# ===============================
# STEP 1: LOAD & FILTER METADATA
# ===============================
print("Loading and filtering metadata...")
try:
    meta_df = pd.read_csv(METADATA_PATH)
    
    # Filter by class
    filtered_df = meta_df[meta_df['Class Name'].isin(ALLOWED_CLASSES)].copy()
    
    # Shuffle to get a random mix
    filtered_df = filtered_df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    print(f"‚úÖ Found {len(filtered_df)} valid noise files.")
    
except Exception as e:
    print(f"‚ùå Error reading metadata: {e}")
    exit()

# ===============================
# STEP 2: COPY FILES
# ===============================
created_count = 0
segments_data = []

print(f"\nüìâ Target to fill: {TARGET_DEFICIT} files")
print(f"üìÇ Copying from: {NOISE_SOURCE_FOLDER}")

# Iterate through the filtered list
for index, row in tqdm(filtered_df.iterrows(), total=min(len(filtered_df), TARGET_DEFICIT)):
    if created_count >= TARGET_DEFICIT:
        break
        
    # Get info
    original_filename = str(row['Dataset File']).strip()
    noise_class = str(row['Class Name']).strip()
    
    source_path = os.path.join(NOISE_SOURCE_FOLDER, original_filename)
    
    if not os.path.exists(source_path):
        continue
        
    # Prepare Destination
    # We rename it to keep your dataset clean and organized
    # New Name: GeneralForest_Rain_OriginalName.wav
    new_filename = f"GeneralForest_{noise_class}_{original_filename}"
    
    location = "GeneralForest"
    label = "NoEagleSound"
    
    # Save Folder: Output / GeneralForest / NoEagleSound
    save_folder = os.path.join(EXISTING_OUTPUT_FOLDER, location, label)
    os.makedirs(save_folder, exist_ok=True)
    
    dest_path = os.path.join(save_folder, new_filename)
    
    try:
        # COPY THE FILE (Preserves metadata)
        shutil.copy2(source_path, dest_path)
        
        # Add to manifest data
        segments_data.append({
            'source_audio': original_filename,
            'segment_filename': new_filename,
            'label': label,
            'label_category': label,
            'quality': 'N/A', 
            'output_folder': os.path.join(location, label),
            # Since we didn't check duration, we assume it's 5.0s (standard for this dataset)
            'segment_start_time': 0.0,
            'segment_end_time': 5.0,
            'segment_duration': 5.0,
            'trigger_annotation_start': 'N/A',
            'trigger_annotation_end': 'N/A',
            'num_annotations': 0,
            'selection_numbers': 'None',
            'annotation_types': 'None',
            'annotation_times': 'None',
            'individual_call_details': f'External_{noise_class}',
            'group_id': 9999 + created_count,
            'overlap_group_id': 9999 + created_count,
            'location_id': location
        })
        
        created_count += 1
        
    except Exception as e:
        print(f"‚ö†Ô∏è Error copying {original_filename}: {e}")
        continue

print(f"\n‚ú® Finished! Copied {created_count} noise files.")

# ===============================
# STEP 3: UPDATE MASTER MANIFEST
# ===============================
manifest_path = os.path.join(EXISTING_OUTPUT_FOLDER, "master_manifest.csv")

if segments_data:
    print("Updating master_manifest.csv...")
    try:
        # Load existing manifest
        if os.path.exists(manifest_path):
            existing_df = pd.read_csv(manifest_path)
            new_df = pd.DataFrame(segments_data)
            combined_df = pd.concat([existing_df, new_df], ignore_index=True)
        else:
            combined_df = pd.DataFrame(segments_data)
            
        combined_df.to_csv(manifest_path, index=False)
        print("‚úÖ Master manifest updated successfully.")
        
    except Exception as e:
        print(f"‚ùå Error updating manifest: {e}")
        # Backup save
        pd.DataFrame(segments_data).to_csv(os.path.join(EXISTING_OUTPUT_FOLDER, "added_noise_manifest.csv"), index=False)
        print("   Saved new data to 'added_noise_manifest.csv' instead.")