In [57]:
import os

def rename_files_in_directory(directory):
    """
    Renames files in the specified directory to follow the format: song_001.wav, song_002.wav, etc.

    Args:
        directory (str): Path to the directory containing files to rename.

    Returns:
        None
    """
    if not os.path.exists(directory):
        print(f"The directory '{directory}' does not exist.")
        return

    files = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    files.sort()  # Sort files to ensure consistent numbering

    for index, filename in enumerate(files, start=1):
        old_file_path = os.path.join(directory, filename)
        
        # Define new file name
        new_name = f"Tamangs_train{index:03d}.wav"
        new_file_path = os.path.join(directory, new_name)

        # Rename file
        try:
            os.rename(old_file_path, new_file_path)
            print(f"Renamed: {filename} -> {new_name}")
        except Exception as e:
            print(f"Failed to rename '{filename}': {e}")



In [59]:
if __name__ == "__main__":
    # User-defined settings
    directory_path = input("Enter the directory path: ").strip()
    rename_files_in_directory(directory_path)


KeyboardInterrupt: Interrupted by user

In [60]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from pathlib import Path

# Configuration
dataset_root = "Audio_dataset/Train"  # Root directory containing class folders
output_csv = "nepali_music_metadata.csv"
num_folds = 10
random_seed = 42

# Collect file information
file_paths = []
class_names = []

# Walk through class directories
for class_dir in Path(dataset_root).iterdir():
    if class_dir.is_dir():
        class_name = class_dir.name  # e.g. "sakela", "deuda"
        for audio_file in class_dir.glob("*.wav"):
            file_paths.append(str(audio_file))
            class_names.append(class_name)

# Create DataFrame
df = pd.DataFrame({
    "file_path": file_paths,
    "class_name": class_names
})

# Add numeric class IDs
df["class_id"] = df["class_name"].astype("category").cat.codes

# Create stratified folds
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=random_seed)
df["fold"] = -1  # Initialize fold column

for fold_idx, (_, test_idx) in enumerate(skf.split(df, df["class_id"])):
    df.iloc[test_idx, df.columns.get_loc("fold")] = fold_idx + 1  # Folds 1-10

# Save metadata
df.to_csv(output_csv, index=False)

In [61]:
# Check class distribution across folds
print("Overall class distribution:")
print(df["class_name"].value_counts())

Overall class distribution:
class_name
jhyaure                    818
asare                      810
tharu                      807
Deuda                      804
Maruni                     800
Kumari                     800
sakela                     800
Salaijo                    800
Sarangi Gandharva songs    800
Tamangs                    798
Name: count, dtype: int64


In [62]:
import os
import librosa
import resampy
import h5py
import numpy as np
import pandas as pd
import tensorflow_hub as hub
from tqdm import tqdm



In [63]:
def extract_features_aligned(wav_root_dir, metadata_csv_path, output_h5_path):
    """Extract YamNet features in EXACT metadata CSV order"""
    # Load metadata and sort by file_path to ensure alignment
    metadata = pd.read_csv(metadata_csv_path).sort_values('file_path')
    
    # Load YamNet model
    model = hub.load('https://tfhub.dev/google/yamnet/1')
    
    # Initialize storage in METADATA ORDER
    features = []
    labels = []
    failed_files = []

    # Process files in EXACT CSV ORDER
    for idx, row in tqdm(metadata.iterrows(), total=len(metadata), desc='Processing audio'):
        file_path = row['file_path']
        class_id = row['class_id']
        
        try:
            # Load and resample with librosa (handles various formats)
            audio, sr = librosa.load(file_path, sr=None, mono=True)
            if sr != 16000:
                audio = resampy.resample(audio, sr, 16000)
            
            # Extract YamNet embeddings
            _, embeddings, _ = model(audio)
            
            # Temporal average pooling (maintain 1024D)
            features.append(np.mean(embeddings.numpy(), axis=0).astype(np.float32))
            labels.append(int(class_id))
            
        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")
            failed_files.append(file_path)
            continue

    # Convert to aligned numpy arrays
    features_array = np.array(features, dtype=np.float32)
    labels_array = np.array(labels, dtype=np.int64)

    # Save with h5py in METADATA ORDER
    with h5py.File(output_h5_path, 'w') as hf:
        hf.create_dataset('features', data=features_array)
        hf.create_dataset('labels', data=labels_array)
        
    # Save list of failed files
    if failed_files:
        with open('failed_files.txt', 'w') as f:
            f.write('\n'.join(failed_files))
        print(f"{len(failed_files)} files failed processing. See failed_files.txt")

    return features_array.shape, labels_array.shape

In [64]:
extract_features_aligned(
    wav_root_dir="Audio_dataset/Train",
    metadata_csv_path="nepali_music_metadata.csv", 
    output_h5_path="nepali_features.hdf5"
)

Processing audio: 100%|███████████████████| 8037/8037 [1:02:46<00:00,  2.13it/s]


((8037, 1024), (8037,))

In [67]:
# Load data
df = pd.read_csv(output_csv).sort_values("file_path")
with h5py.File('nepali_features.hdf5', 'r') as hf:
    hdf5_labels = hf["labels"][:]
    hdf5_features = hf["features"][:]

# Check random samples
for _ in range(8038):
    idx = np.random.randint(0, len(df))
    csv_class = df.iloc[idx]["class_id"]
    hdf5_class = hdf5_labels[idx]
    
    assert csv_class == hdf5_class, \
        f"Mismatch at index {idx}: CSV={csv_class}, HDF5={hdf5_class}"

print("All files aligned correctly!")

All files aligned correctly!


In [68]:
import h5py

# Replace 'your_file.hdf5' with the path to your HDF5 file
file_path = 'nepali_features.hdf5'

# Open the HDF5 file
with h5py.File(file_path, 'r') as f:
    # Function to recursively print the structure
    def print_structure(name, obj):
        print(name)
        if isinstance(obj, h5py.Group):
            for key in obj.keys():
                print_structure(f"{name}/{key}", obj[key])
        elif isinstance(obj, h5py.Dataset):
            print(f"    Dataset shape: {obj.shape}, dtype: {obj.dtype}")

    # Start printing the structure from the root
    print_structure('/', f)

/
//features
    Dataset shape: (8037, 1024), dtype: float32
//labels
    Dataset shape: (8037,), dtype: int64
