# Data Generation for the GTZan Music Genre Classification

## Import Statements

Start by importing necessary libraries.

In [1]:
import os
import librosa
import pandas as pd
from pydub import AudioSegment
import tempfile
import numpy as np
import soundfile as sf



## Helper Functions

Convert to MP3 function.

In [2]:
# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file):
    try:
        sound = AudioSegment.from_mp3(mp3_file)
        wav_file = tempfile.mktemp(suffix='.wav')
        sound.export(wav_file, format="wav")
        return wav_file
    except Exception as e:
        print(f"Error converting {mp3_file} to WAV: {e}")
        return None

Feature extraction from a segment. 

In [3]:
# Function to extract features from an audio segment
def extract_features_from_segment(y, sr, start_time, end_time):
    segment = y[start_time:end_time]

    chroma_stft = librosa.feature.chroma_stft(y=segment, sr=sr)
    rms = librosa.feature.rms(y=segment)
    spectral_centroid = librosa.feature.spectral_centroid(y=segment, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=segment, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=segment, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=segment)
    harmony, perceptr = librosa.effects.hpss(segment)
    tempo, _ = librosa.beat.beat_track(y=segment, sr=sr)
    mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20)

    features = {
        'chroma_stft_mean': chroma_stft.mean() if chroma_stft.size else 0,
        'chroma_stft_var': chroma_stft.var() if chroma_stft.size else 0,
        'rms_mean': rms.mean() if rms.size else 0,
        'rms_var': rms.var() if rms.size else 0,
        'spectral_centroid_mean': spectral_centroid.mean() if spectral_centroid.size else 0,
        'spectral_centroid_var': spectral_centroid.var() if spectral_centroid.size else 0,
        'spectral_bandwidth_mean': spectral_bandwidth.mean() if spectral_bandwidth.size else 0,
        'spectral_bandwidth_var': spectral_bandwidth.var() if spectral_bandwidth.size else 0,
        'rolloff_mean': rolloff.mean() if rolloff.size else 0,
        'rolloff_var': rolloff.var() if rolloff.size else 0,
        'zero_crossing_rate_mean': zero_crossing_rate.mean() if zero_crossing_rate.size else 0,
        'zero_crossing_rate_var': zero_crossing_rate.var() if zero_crossing_rate.size else 0,
        'harmony_mean': harmony.mean() if harmony.size else 0,
        'harmony_var': harmony.var() if harmony.size else 0,
        'perceptr_mean': perceptr.mean() if perceptr.size else 0,
        'perceptr_var': perceptr.var() if perceptr.size else 0,
        'tempo': tempo,
    }

    for i in range(1, 21):
        features[f'mfcc{i}_mean'] = mfcc[i-1].mean() if mfcc.shape[0] >= i else 0
        features[f'mfcc{i}_var'] = mfcc[i-1].var() if mfcc.shape[0] >= i else 0

    return features

Load audio helpfer function.

In [4]:
# Function to load audio file
def load_audio(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)
    except sf.LibsndfileError:
        print(f"LibsndfileError: {file_path}")
        return None, None
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None
    return y, sr

Segment data and call feature extraction.

In [5]:
# Function to extract features from an audio file
def extract_features(audio_file, segment_duration=10):
    try:
        y, sr = load_audio(audio_file)
        if y is None:
            return []

        total_duration = len(y) / sr
        segment_length = int(sr * segment_duration)

        features_list = []

        for start in range(0, len(y), segment_length):
            end = start + segment_length
            if end <= len(y):
                segment_features = extract_features_from_segment(y, sr, start, end)
                all_features = segment_features
                all_features['filename'] = os.path.basename(audio_file)
                all_features['start'] = start / sr
                all_features['end'] = end / sr
                features_list.append(all_features)

        return features_list

    except Exception as e:
        print(f"Error extracting features from {audio_file}: {e}")
        return []

## Define Data Paths

Specify the paths to the CSV files containing data.

In [6]:
# List of genres
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Base folder containing genre subfolders
base_folder_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/GTZan/genres_original'

## Process Input Audio Files

Process input files and label the data. 

In [7]:
# Function to process a folder of audio files
def process_audio_folder(folder_path, genre_label):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav') or filename.endswith('.mp3'):
            file_path = os.path.join(folder_path, filename)
            if filename.endswith('.mp3'):
                file_path = convert_mp3_to_wav(file_path)
            features_list = extract_features(file_path)
            for features in features_list:
                features['genre'] = genre_label
                results.append(features)
    return results

## Create Singular CSV

Aggregate results of the feature extraction into a singular CSV.

In [8]:
all_results = []

for genre in genres:
    print(f"Processing genre: {genre}")
    folder_path = os.path.join(base_folder_path, genre)
    genre_results = process_audio_folder(folder_path, genre)
    all_results.extend(genre_results)
    print(f"Completed processing genre: {genre}")

print("Writing results to CSV file...")
df = pd.DataFrame(all_results)
csv_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/all_genres_audio_features.csv'
df.to_csv(csv_file_path, index=False)
print("CSV file generation completed.")

df.head()

Processing genre: blues
Completed processing genre: blues
Processing genre: classical
Completed processing genre: classical
Processing genre: country
Completed processing genre: country
Processing genre: disco
Completed processing genre: disco
Processing genre: hiphop
Completed processing genre: hiphop
Processing genre: jazz


  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error loading /Users/isaiah/Desktop/Career/Projects/music-genre-detector/GTZan/genres_original/jazz/jazz.00054.wav: 
Completed processing genre: jazz
Processing genre: metal
Completed processing genre: metal
Processing genre: pop
Completed processing genre: pop
Processing genre: reggae
Completed processing genre: reggae
Processing genre: rock
Completed processing genre: rock
Writing results to CSV file...
CSV file generation completed.


Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,filename,start,end,genre
0,0.399429,0.092919,0.061375,0.001746,574.917649,126695.32513,968.464834,87971.594698,982.858326,588787.2,...,0.060722,46.816097,4.530915,42.765888,1.700291,35.36692,blues.00093.wav,0.0,10.0,blues
1,0.382725,0.096942,0.059265,0.001327,571.488368,65396.649681,1001.270756,64480.456959,914.161912,305725.3,...,1.393839,25.18078,1.342265,48.229031,-5.838692,68.770432,blues.00093.wav,10.0,20.0,blues
2,0.355013,0.099065,0.076845,0.002158,567.393583,125956.638174,1022.309635,105444.049669,898.848857,631589.7,...,-1.191103,29.066435,0.475051,32.250404,-4.556223,48.543552,blues.00093.wav,20.0,30.0,blues
3,0.319351,0.094813,0.155237,0.002587,1379.320718,541760.685298,1829.221092,173513.186918,2890.545244,2591938.0,...,-1.237206,53.630169,2.549892,49.117638,-5.811831,82.158409,blues.00087.wav,0.0,10.0,blues
4,0.345845,0.09406,0.171401,0.003239,1425.317034,276693.251662,1844.78863,116117.705916,3012.799879,1621920.0,...,2.447671,72.564873,3.575914,55.017838,-5.471438,67.686737,blues.00087.wav,10.0,20.0,blues


## Examine Data

Gain insights into data. 

In [9]:
# Get summary statistics
df.describe(include='all')

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,filename,start,end,genre
count,2988.0,2988.0,2988.0,2988.0,2988.0,2988.0,2988.0,2988.0,2988.0,2988.0,...,2988.0,2988.0,2988.0,2988.0,2988.0,2988.0,2988,2988.0,2988.0,2988
unique,,,,,,,,,,,...,,,,,,,999,,,10
top,,,,,,,,,,,...,,,,,,,blues.00093.wav,,,blues
freq,,,,,,,,,,,...,,,,,,,3,,,300
mean,0.378823,0.085846,0.13057,0.002870646,2200.838173,449632.8,2241.99465,129183.5,4569.365666,1759079.0,...,0.603902,60.373924,-2.408998,62.671871,-1.031245,65.967857,,9.96988,19.96988,
std,0.084776,0.008636,0.06696,0.003593415,729.637169,414671.5,533.45198,99923.76,1600.754328,1456471.0,...,4.325673,39.036507,4.259204,40.9207,4.362487,47.34573,,8.160125,8.160125,
min,0.126113,0.014554,0.001251,1.780778e-07,518.94968,2389.426,695.900448,1960.306,713.768228,1260.12,...,-15.889191,10.180762,-20.310465,11.670246,-21.439188,0.528655,,0.0,10.0,
25%,0.318897,0.081522,0.085172,0.0008013145,1641.892948,158653.9,1892.997981,59846.62,3386.701969,704848.7,...,-2.127181,36.199359,-5.087884,37.353881,-3.614868,37.319559,,0.0,10.0,
50%,0.384743,0.08631,0.12144,0.001714807,2219.993089,316581.7,2225.534119,104149.5,4660.726922,1335184.0,...,0.550131,49.636463,-2.512369,51.855022,-1.112768,53.529154,,10.0,20.0,
75%,0.438246,0.091423,0.175636,0.003326403,2701.593712,595725.9,2586.507099,170941.8,5570.54847,2421513.0,...,3.445446,71.917171,0.162183,75.648439,1.648025,78.92655,,20.0,30.0,


## Process single mp3 for validation

In [10]:
def process_single_mp3(mp3_file_path):
    wav_file_path = convert_mp3_to_wav(mp3_file_path)
    if wav_file_path:
        features_list = extract_features(wav_file_path)
        if features_list:
            df_new = pd.DataFrame(features_list)
            output_csv_filename = os.path.basename(mp3_file_path).replace('.mp3', '_features.csv')
            output_csv_path = os.path.join(os.path.dirname(mp3_file_path), '..', output_csv_filename)
            df_new.to_csv(output_csv_path, index=False)
            print(f"Features extracted and saved to {output_csv_path}")
        else:
            print("No features extracted.")
    else:
        print("Failed to convert MP3 to WAV.")

Usage of validation data generator.

In [11]:
# Example usage
mp3_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/inputs/file.mp3'
process_single_mp3(mp3_file_path)

Features extracted and saved to /Users/isaiah/Desktop/Career/Projects/music-genre-detector/inputs/../file_features.csv
