In [1]:
import os
import librosa
import soundfile as sf
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [3]:
def segment_music_files(parent_folder, output_folder, supported_extensions=('.aac', '.au', '.flac', '.m4a', '.mp3', 'ogg', '.wav')):
    for genre_folder in os.listdir(parent_folder):
        genre_path = os.path.join(parent_folder, genre_folder)
        if os.path.isdir(genre_path):
            output_genre_path = os.path.join(output_folder, genre_folder)
            os.makedirs(output_genre_path, exist_ok=True)

            for filename in os.listdir(genre_path):
                if filename.lower().endswith(supported_extensions):
                    file_path = os.path.join(genre_path, filename)
                    base_filename, ext = os.path.splitext(filename)

                    try:
                        audio_file, sr = librosa.load(file_path)

                        # Loop through audio data in 29-second chunks
                        segment_index = 0
                        for i in range(0, len(audio_file), sr * 29):
                            segment = audio_file[i:i + sr * 29]

                            # Check for non-empty segment before saving
                            if len(segment) >= sr * 29 and len(segment) > 0:
                                new_filename = f"{base_filename}{ext}"
                                new_file_path = os.path.join(output_genre_path, new_filename)

                                # Check if the file already exists, and append a number if it does
                                if os.path.exists(new_file_path):
                                    new_filename = f"{base_filename}_{segment_index}{ext}"
                                    new_file_path = os.path.join(output_genre_path, new_filename)
                                    segment_index += 1

                                sf.write(new_file_path, segment, sr)

                    except (IOError, sf.SoundFileError) as e:
                        print(f"Error processing file '{filename}': {e}")

segment_music_files('genres_original', 'Segmented_files')

In [2]:
def extract_audio_features(input_dir, output_file_X, output_file_y):
    audio_data = []
    genre_labels = []

    for genre_dir in os.listdir(input_dir):
        genre_label = genre_dir
        subdirectory_path = os.path.join(input_dir, genre_dir)

        for filename in tqdm(os.listdir(subdirectory_path), desc=f"Processing files in {genre_dir}"):
            filepath = os.path.join(subdirectory_path, filename)
            try:
                audio_features = load_audio_and_preprocess(filepath)
                audio_data.append(audio_features.flatten())  # Flatten the mel spectrogram
                genre_labels.append(genre_label)
            except UserWarning as e:
                if "Trying to estimate tuning from empty frequency set" in str(e):
                    print(f"Error processing file {filename} in {genre_dir}: {e}")
                    print(filepath)

    encoder = LabelEncoder()
    genre_labels_encoded = encoder.fit_transform(genre_labels)

    # Create pandas DataFrames
    audio_data_df = pd.DataFrame(audio_data)
    genre_labels_df = pd.DataFrame({'genre': encoder.inverse_transform(genre_labels_encoded)})

    # Create the 'extracted_csv' directory if it doesn't exist
    os.makedirs('extracted_csv', exist_ok=True)

    # Save audio features to CSV
    audio_data_df.to_csv(output_file_X, index=False, header=False)

    # Save genre labels to CSV
    genre_labels_df.to_csv(output_file_y, index=False)

    return audio_data_df, genre_labels_df

In [None]:
def load_audio_and_preprocess(filepath):
    # Load the audio file
    y, sr = librosa.load(filepath)

    # Compute the mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

    return mel_spectrogram

# Set the input directory containing the genre subdirectories
input_dir = 'Segmented_files'

# Set the output file paths for audio features and genre labels
output_file_X = 'extracted_csv/mel_spectrogram_X.csv'
output_file_y = 'extracted_csv/mel_spectrogram_y.csv'

# Call the extract_audio_features function
audio_data_df, genre_labels_df = extract_audio_features(input_dir, output_file_X, output_file_y)

Processing files in hiphop: 100%|█████████████████████████████████████████████████████| 100/100 [00:12<00:00,  7.81it/s]
Processing files in classical: 100%|██████████████████████████████████████████████████| 100/100 [00:10<00:00,  9.31it/s]
Processing files in blues: 100%|██████████████████████████████████████████████████████| 100/100 [00:10<00:00,  9.55it/s]
Processing files in metal: 100%|██████████████████████████████████████████████████████| 100/100 [00:11<00:00,  9.07it/s]
Processing files in jazz: 100%|█████████████████████████████████████████████████████████| 99/99 [00:10<00:00,  9.61it/s]
Processing files in country: 100%|████████████████████████████████████████████████████| 100/100 [00:10<00:00,  9.10it/s]
Processing files in pop: 100%|████████████████████████████████████████████████████████| 100/100 [00:11<00:00,  8.85it/s]
Processing files in rock: 100%|███████████████████████████████████████████████████████| 100/100 [00:11<00:00,  9.04it/s]
Processing files in disco:  70%|