# Data Generation for the GTZan Music Genre Classification

## Import Statements

Start by importing necessary libraries.

In [15]:
import os
import librosa
import pandas as pd
from pydub import AudioSegment
import tempfile
import numpy as np
import soundfile as sf

## Helper Functions

Convert to MP3 function.

In [16]:
# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file):
    try:
        sound = AudioSegment.from_mp3(mp3_file)
        wav_file = tempfile.mktemp(suffix='.wav')
        sound.export(wav_file, format="wav")
        return wav_file
    except Exception as e:
        print(f"Error converting {mp3_file} to WAV: {e}")
        return None

Feature extraction from a segment. 

In [17]:
# Function to extract features from an audio segment
def extract_features_from_segment(y, sr, start_time, end_time):
    segment = y[start_time:end_time]

    chroma_stft = librosa.feature.chroma_stft(y=segment, sr=sr)
    rms = librosa.feature.rms(y=segment)
    spectral_centroid = librosa.feature.spectral_centroid(y=segment, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=segment, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=segment, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=segment)
    harmony, perceptr = librosa.effects.hpss(segment)
    tempo, _ = librosa.beat.beat_track(y=segment, sr=sr)
    mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20)

    features = {
        'chroma_stft_mean': chroma_stft.mean() if chroma_stft.size else 0,
        'chroma_stft_var': chroma_stft.var() if chroma_stft.size else 0,
        'rms_mean': rms.mean() if rms.size else 0,
        'rms_var': rms.var() if rms.size else 0,
        'spectral_centroid_mean': spectral_centroid.mean() if spectral_centroid.size else 0,
        'spectral_centroid_var': spectral_centroid.var() if spectral_centroid.size else 0,
        'spectral_bandwidth_mean': spectral_bandwidth.mean() if spectral_bandwidth.size else 0,
        'spectral_bandwidth_var': spectral_bandwidth.var() if spectral_bandwidth.size else 0,
        'rolloff_mean': rolloff.mean() if rolloff.size else 0,
        'rolloff_var': rolloff.var() if rolloff.size else 0,
        'zero_crossing_rate_mean': zero_crossing_rate.mean() if zero_crossing_rate.size else 0,
        'zero_crossing_rate_var': zero_crossing_rate.var() if zero_crossing_rate.size else 0,
        'harmony_mean': harmony.mean() if harmony.size else 0,
        'harmony_var': harmony.var() if harmony.size else 0,
        'perceptr_mean': perceptr.mean() if perceptr.size else 0,
        'perceptr_var': perceptr.var() if perceptr.size else 0,
        'tempo': tempo,
    }

    for i in range(1, 21):
        features[f'mfcc{i}_mean'] = mfcc[i-1].mean() if mfcc.shape[0] >= i else 0
        features[f'mfcc{i}_var'] = mfcc[i-1].var() if mfcc.shape[0] >= i else 0

    return features

Load audio helpfer function.

In [18]:
# Function to load audio file
def load_audio(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)
    except sf.LibsndfileError:
        print(f"LibsndfileError: {file_path}")
        return None, None
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None
    return y, sr

Segment data and call feature extraction.

In [19]:
# Function to extract features from an audio file
def extract_features(audio_file, segment_duration=15):
    try:
        y, sr = load_audio(audio_file)
        if y is None:
            return []

        total_duration = len(y) / sr
        segment_length = int(sr * segment_duration)

        features_list = []

        for start in range(0, len(y), segment_length):
            end = start + segment_length
            if end <= len(y):
                segment_features = extract_features_from_segment(y, sr, start, end)
                all_features = segment_features
                all_features['filename'] = os.path.basename(audio_file)
                all_features['start'] = start / sr
                all_features['end'] = end / sr
                features_list.append(all_features)

        return features_list

    except Exception as e:
        print(f"Error extracting features from {audio_file}: {e}")
        return []

## Define Data Paths

Specify the paths to the CSV files containing data.

In [20]:
# List of genres
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Base folder containing genre subfolders
base_folder_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/GTZan/genres_original'

## Process Input Audio Files

Process input files and label the data. 

In [21]:
# Function to process a folder of audio files
def process_audio_folder(folder_path, genre_label):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav') or filename.endswith('.mp3'):
            file_path = os.path.join(folder_path, filename)
            if filename.endswith('.mp3'):
                file_path = convert_mp3_to_wav(file_path)
            features_list = extract_features(file_path)
            for features in features_list:
                features['genre'] = genre_label
                results.append(features)
    return results

## Create Singular CSV

Aggregate results of the feature extraction into a singular CSV.

In [22]:
all_results = []

for genre in genres:
    print(f"Processing genre: {genre}")
    folder_path = os.path.join(base_folder_path, genre)
    genre_results = process_audio_folder(folder_path, genre)
    all_results.extend(genre_results)
    print(f"Completed processing genre: {genre}")

print("Writing results to CSV file...")
df = pd.DataFrame(all_results)
csv_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/all_genres_audio_features.csv'
df.to_csv(csv_file_path, index=False)
print("CSV file generation completed.")

df.head()

Processing genre: blues


## Examine Data

Gain insights into data. 

In [None]:
# Get summary statistics
df.describe(include='all')

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,filename,start,end,genre
count,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,...,9981.0,9981.0,9981.0,9981.0,9981.0,9981.0,9981,9981.0,9981.0,9981
unique,,,,,,,,,,,...,,,,,,,999,,,10
top,,,,,,,,,,,...,,,,,,,blues.00093.wav,,,blues
freq,,,,,,,,,,,...,,,,,,,10,,,1000
mean,0.379964,0.084882,0.130039,0.002672434,2201.910957,415925.5,2244.56246,118312.1,4571.568401,1623468.0,...,0.724376,52.343689,-2.497094,54.811691,-0.929246,57.142101,,13.487827,16.487827,
std,0.090624,0.009675,0.068168,0.003561532,750.540439,433967.5,541.420376,100250.1,1639.481644,1482634.0,...,5.175797,38.067791,5.107192,41.505894,5.247202,46.342796,,8.611614,8.611614,
min,0.108073,0.015217,0.000947,4.055916e-08,479.905803,2161.498,499.577102,1295.35,673.906438,1130.834,...,-20.749748,3.445752,-27.359076,3.147765,-35.614895,0.253587,,0.0,3.0,
25%,0.316037,0.07982,0.083223,0.000628582,1634.097152,122833.6,1890.204723,49414.1,3389.905912,556238.5,...,-2.524088,29.405123,-5.734853,30.384859,-4.01272,29.925747,,6.0,9.0,
50%,0.385163,0.085137,0.120488,0.001500287,2211.777107,264393.1,2233.071916,90371.22,4634.773513,1155826.0,...,0.730935,41.686157,-2.700388,43.264107,-1.045194,44.173588,,12.0,15.0,
75%,0.442906,0.091154,0.175334,0.00311376,2713.457812,561220.0,2590.295338,157892.9,5597.307692,2251497.0,...,3.871771,61.854343,0.521315,65.165123,2.192562,68.00071,,21.0,24.0,


## Process single mp3 for validation

In [None]:
def process_single_mp3(mp3_file_path):
    wav_file_path = convert_mp3_to_wav(mp3_file_path)
    if wav_file_path:
        features_list = extract_features(wav_file_path)
        if features_list:
            df_new = pd.DataFrame(features_list)
            output_csv_filename = os.path.basename(mp3_file_path).replace('.mp3', '_features.csv')
            output_csv_path = os.path.join(os.path.dirname(mp3_file_path), '..', output_csv_filename)
            df_new.to_csv(output_csv_path, index=False)
            print(f"Features extracted and saved to {output_csv_path}")
        else:
            print("No features extracted.")
    else:
        print("Failed to convert MP3 to WAV.")

Usage of validation data generator.

In [None]:
# Example usage
mp3_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/inputs/file.mp3'
process_single_mp3(mp3_file_path)

Features extracted and saved to /Users/isaiah/Desktop/Career/Projects/music-genre-detector/inputs/../file_features.csv
