In [2]:
import os
import numpy as np
import pandas as pd
import librosa

# Define the function to create MFCC coefficients for a single audio file
def create_MFCC_coefficients(file_name):
    sr_value = 44100
    n_mfcc_count = 20
    
    try:
        # Load the audio file using librosa
        y, sr = librosa.load(file_name, sr=sr_value)
        
        # Compute MFCC coefficients
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc_count)
        
        # Create and return MFCC dataframe
        coeff_df = pd.DataFrame(mfccs)
        
        return coeff_df

    except Exception as e:
        print(f"Error creating MFCC coefficients for {file_name}: {str(e)}")
        return None

# Function to process all audio files in the given folder and its subfolders
def process_audio_folders(root_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for subfolder in os.listdir(root_folder):
        subfolder_path = os.path.join(root_folder, subfolder)
        
        # Check if the item is a folder
        if os.path.isdir(subfolder_path):
            audio_files = [f for f in os.listdir(subfolder_path) if f.endswith('.mp3')]
            
            # Process each audio file in the subfolder
            for index, file_name in enumerate(audio_files):
                file_path = os.path.join(subfolder_path, file_name)
                
                # Generate MFCC coefficients
                coeff_df = create_MFCC_coefficients(file_path)
                
                if coeff_df is not None:
                    # Define the output file name
                    output_file_name = f"{subfolder}-song{index+1}.csv"
                    output_file_path = os.path.join(output_folder, output_file_name)
                    
                    # Save MFCC coefficients as CSV
                    coeff_df.to_csv(output_file_path, header = False, index=False)
                    print(f"Saved MFCCs to {output_file_path}")
                else:
                    print(f"Failed to process {file_path}")

# Define the root folder containing subfolders and output folder for MFCC CSVs
root_folder = 'TRAIN_DATA_Expanded'
output_folder = 'TRAIN_MFCC_Final'

# Process all audio files and save MFCCs
process_audio_folders(root_folder, output_folder)


Saved MFCCs to TRAIN_MFCC_Final\AB-song1.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song2.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song3.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song4.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song5.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song6.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song7.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song8.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song9.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song10.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song11.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song12.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song13.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song14.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song15.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song16.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song17.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song18.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song19.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song20.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song21.csv
Saved MFCCs to TRAIN_MFCC_Final\AB-song22.c