In [4]:
from pathlib import Path
import pandas as pd
import shutil

In [2]:
# Define base directory
AUDIO_DIR = Path("../audio_data/gun_sound_v2")

# Filtered list
filtered_files = []

# Loop through all .mp3 files
for file in AUDIO_DIR.glob("*.mp3"):
    parts = file.stem.split("_")  # Remove ".mp3" and split by "_"
    
    if len(parts) >= 4:
        gun_type, distance, direction, clip_id = parts
        if distance == "0m" and direction == "center":
            filtered_files.append(file)

# Result
print(f"✅ Found {len(filtered_files)} files matching 0m/center")
print("🔫 Example files:")
for f in filtered_files[:5]:
    print("-", f.name)

✅ Found 1013 files matching 0m/center
🔫 Example files:
- s12k_0m_center_1986.mp3
- s12k_0m_center_1992.mp3
- s12k_0m_center_1979.mp3
- pp_0m_center_1894.mp3
- tomy_0m_center_0812.mp3


In [11]:
file_components = [f.name.split("_") for f in filtered_files]

df = pd.DataFrame({
    "file_name": [f.name for f in filtered_files],
    "label": [components[0] for components in file_components],
    "id": [components[3].split(".")[0] for components in file_components]
})
# df.to_csv("filtered_bgg_0m_center.csv", index=False)

In [12]:
df

Unnamed: 0,file_name,label,id
0,s12k_0m_center_1986.mp3,s12k,1986
1,s12k_0m_center_1992.mp3,s12k,1992
2,s12k_0m_center_1979.mp3,s12k,1979
3,pp_0m_center_1894.mp3,pp,1894
4,tomy_0m_center_0812.mp3,tomy,0812
...,...,...,...
1008,ak_0m_center_0008.mp3,ak,0008
1009,kar_0m_center_0552.mp3,kar,0552
1010,m249_0m_center_0588.mp3,m249,0588
1011,m24_0m_center_1822.mp3,m24,1822


In [13]:
df.to_parquet(Path("../metadata/gun_sound_final_metadata.paquet"), index=False, engine="pyarrow", compression="gzip")

In [15]:
from typing import List, Tuple
import logging
from tqdm.notebook import tqdm

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def filter_audio_files(
    source_dir: Path, 
    distance_filter: str = "0m", 
    direction_filter: str = "center"
) -> List[Path]:
    """
    Filter audio files based on distance and direction criteria.
    
    Args:
        source_dir: Source directory containing audio files
        distance_filter: Distance criteria to filter by
        direction_filter: Direction criteria to filter by
        
    Returns:
        List of filtered file paths
    """
    filtered_files = []
    
    if not source_dir.exists():
        logger.error(f"Source directory does not exist: {source_dir}")
        return filtered_files
    
    # Get all mp3 files first to show progress
    all_files = list(source_dir.glob("*.mp3"))
    
    for file in tqdm(all_files, desc="Filtering audio files", unit="file"):
        try:
            parts = file.stem.split("_")
            if len(parts) >= 4:
                gun_type, distance, direction, clip_id = parts[:4]
                if distance == distance_filter and direction == direction_filter:
                    filtered_files.append(file)
        except Exception as e:
            logger.warning(f"Error processing file {file.name}: {e}")
    
    logger.info(f"Found {len(filtered_files)} files matching {distance_filter}/{direction_filter}")
    return filtered_files

def copy_files_with_progress(
    files: List[Path], 
    target_dir: Path, 
    use_move: bool = False
) -> Tuple[List[str], List[str]]:
    """
    Copy or move files to target directory with error handling.
    
    Args:
        files: List of source files to copy/move
        target_dir: Destination directory
        use_move: If True, move files instead of copying
        
    Returns:
        Tuple of (successful_files, failed_files)
    """
    target_dir.mkdir(parents=True, exist_ok=True)
    
    successful_files = []
    failed_files = []
    
    operation = "Moving" if use_move else "Copying"
    logger.info(f"{operation} {len(files)} files to {target_dir}")
    
    for file in tqdm(files, desc=f"{operation} files", unit="file"):
        try:
            dest_path = target_dir / file.name
            
            if dest_path.exists():
                logger.warning(f"File already exists, skipping: {file.name}")
                continue
                
            if use_move:
                shutil.move(str(file), str(dest_path))
            else:
                shutil.copy2(file, dest_path)
                
            successful_files.append(file.name)
            
        except Exception as e:
            logger.error(f"Failed to {operation.lower()} {file.name}: {e}")
            failed_files.append(file.name)
    
    return successful_files, failed_files

# Configuration
CONFIG = {
    "source_dir": Path("../audio_data/gun_sound_v2"),
    "target_dir": Path("../audio_data/gun_sound_final"),
    "distance_filter": "0m",
    "direction_filter": "center",
    "use_move": False  # Set to True to move instead of copy
}

# Execute the filtering and copying process
try:
    # Step 1: Filter files
    filtered_files = filter_audio_files(
        CONFIG["source_dir"], 
        CONFIG["distance_filter"], 
        CONFIG["direction_filter"]
    )
    
    if not filtered_files:
        logger.warning("No files found matching the criteria")
    else:
        # Step 2: Copy/move files
        successful_files, failed_files = copy_files_with_progress(
            filtered_files, 
            CONFIG["target_dir"], 
            CONFIG["use_move"]
        )
        
        # Step 3: Summary
        logger.info(f"✅ Successfully processed {len(successful_files)} files")
        if failed_files:
            logger.error(f"❌ Failed to process {len(failed_files)} files")
        
        # Show examples
        print(f"\n✅ Processed {len(successful_files)} files to {CONFIG['target_dir']}")
        if failed_files:
            print(f"❌ Failed to process {len(failed_files)} files")
        
        print("\n🔫 Example files:")
        for f in successful_files[:5]:
            print(f"   - {f}")
            
except Exception as e:
    logger.error(f"Critical error during processing: {e}")
    raise

Filtering audio files:   0%|          | 0/2195 [00:00<?, ?file/s]

2025-07-11 13:46:04,933 - INFO - Found 1013 files matching 0m/center
2025-07-11 13:46:04,937 - INFO - Copying 1013 files to ../audio_data/gun_sound_final


Copying files:   0%|          | 0/1013 [00:00<?, ?file/s]

2025-07-11 13:46:06,401 - INFO - ✅ Successfully processed 1013 files



✅ Processed 1013 files to ../audio_data/gun_sound_final

🔫 Example files:
   - s12k_0m_center_1986.mp3
   - s12k_0m_center_1992.mp3
   - s12k_0m_center_1979.mp3
   - pp_0m_center_1894.mp3
   - tomy_0m_center_0812.mp3
