# Data Generation for the GTZan Music Genre Classification

## Import Statements

Start by importing necessary libraries.

In [23]:
import os
import librosa
import pandas as pd
from pydub import AudioSegment
import tempfile
import numpy as np
import soundfile as sf

## Helper Functions

Convert to MP3 function.

In [24]:
# Function to convert MP3 to WAV
def convert_mp3_to_wav(mp3_file):
    try:
        sound = AudioSegment.from_mp3(mp3_file)
        wav_file = tempfile.mktemp(suffix='.wav')
        sound.export(wav_file, format="wav")
        return wav_file
    except Exception as e:
        print(f"Error converting {mp3_file} to WAV: {e}")
        return None

Feature extraction from a segment. 

In [25]:
# Function to extract features from an audio segment
def extract_features_from_segment(y, sr, start_time, end_time):
    segment = y[start_time:end_time]

    chroma_stft = librosa.feature.chroma_stft(y=segment, sr=sr)
    rms = librosa.feature.rms(y=segment)
    spectral_centroid = librosa.feature.spectral_centroid(y=segment, sr=sr)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=segment, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=segment, sr=sr)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=segment)
    harmony, perceptr = librosa.effects.hpss(segment)
    tempo, _ = librosa.beat.beat_track(y=segment, sr=sr)
    mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20)

    features = {
        'chroma_stft_mean': chroma_stft.mean() if chroma_stft.size else 0,
        'chroma_stft_var': chroma_stft.var() if chroma_stft.size else 0,
        'rms_mean': rms.mean() if rms.size else 0,
        'rms_var': rms.var() if rms.size else 0,
        'spectral_centroid_mean': spectral_centroid.mean() if spectral_centroid.size else 0,
        'spectral_centroid_var': spectral_centroid.var() if spectral_centroid.size else 0,
        'spectral_bandwidth_mean': spectral_bandwidth.mean() if spectral_bandwidth.size else 0,
        'spectral_bandwidth_var': spectral_bandwidth.var() if spectral_bandwidth.size else 0,
        'rolloff_mean': rolloff.mean() if rolloff.size else 0,
        'rolloff_var': rolloff.var() if rolloff.size else 0,
        'zero_crossing_rate_mean': zero_crossing_rate.mean() if zero_crossing_rate.size else 0,
        'zero_crossing_rate_var': zero_crossing_rate.var() if zero_crossing_rate.size else 0,
        'harmony_mean': harmony.mean() if harmony.size else 0,
        'harmony_var': harmony.var() if harmony.size else 0,
        'perceptr_mean': perceptr.mean() if perceptr.size else 0,
        'perceptr_var': perceptr.var() if perceptr.size else 0,
        'tempo': tempo,
    }

    for i in range(1, 21):
        features[f'mfcc{i}_mean'] = mfcc[i-1].mean() if mfcc.shape[0] >= i else 0
        features[f'mfcc{i}_var'] = mfcc[i-1].var() if mfcc.shape[0] >= i else 0

    return features

Load audio helpfer function.

In [26]:
# Function to load audio file
def load_audio(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)
    except sf.LibsndfileError:
        print(f"LibsndfileError: {file_path}")
        return None, None
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None
    return y, sr

Segment data and call feature extraction.

In [27]:
# Function to extract features from an audio file
def extract_features(audio_file, segment_duration=30):
    try:
        y, sr = load_audio(audio_file)
        if y is None:
            return []

        total_duration = len(y) / sr
        segment_length = int(sr * segment_duration)

        features_list = []

        for start in range(0, len(y), segment_length):
            end = start + segment_length
            if end <= len(y):
                segment_features = extract_features_from_segment(y, sr, start, end)
                all_features = segment_features
                all_features['filename'] = os.path.basename(audio_file)
                all_features['start'] = start / sr
                all_features['end'] = end / sr
                features_list.append(all_features)

        return features_list

    except Exception as e:
        print(f"Error extracting features from {audio_file}: {e}")
        return []

## Define Data Paths

Specify the paths to the CSV files containing data.

In [28]:
# List of genres
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Base folder containing genre subfolders
base_folder_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/GTZan/genres_original'

## Process Input Audio Files

Process input files and label the data. 

In [29]:
# Function to process a folder of audio files
def process_audio_folder(folder_path, genre_label):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav') or filename.endswith('.mp3'):
            file_path = os.path.join(folder_path, filename)
            if filename.endswith('.mp3'):
                file_path = convert_mp3_to_wav(file_path)
            features_list = extract_features(file_path)
            for features in features_list:
                features['genre'] = genre_label
                results.append(features)
    return results

## Create Singular CSV

Aggregate results of the feature extraction into a singular CSV.

In [30]:
all_results = []

for genre in genres:
    print(f"Processing genre: {genre}")
    folder_path = os.path.join(base_folder_path, genre)
    genre_results = process_audio_folder(folder_path, genre)
    all_results.extend(genre_results)
    print(f"Completed processing genre: {genre}")

print("Writing results to CSV file...")
df = pd.DataFrame(all_results)
csv_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/all_genres_audio_features.csv'
df.to_csv(csv_file_path, index=False)
print("CSV file generation completed.")

df.head()

Processing genre: blues
Completed processing genre: blues
Processing genre: classical
Completed processing genre: classical
Processing genre: country
Completed processing genre: country
Processing genre: disco
Completed processing genre: disco
Processing genre: hiphop
Completed processing genre: hiphop
Processing genre: jazz


  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error loading /Users/isaiah/Desktop/Career/Projects/music-genre-detector/GTZan/genres_original/jazz/jazz.00054.wav: 
Completed processing genre: jazz
Processing genre: metal
Completed processing genre: metal
Processing genre: pop
Completed processing genre: pop
Processing genre: reggae
Completed processing genre: reggae
Processing genre: rock
Completed processing genre: rock
Writing results to CSV file...
CSV file generation completed.


Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,filename,start,end,genre
0,0.377736,0.096391,0.065891,0.001804,570.150688,104951.648462,995.093473,84457.870101,928.852717,500030.4,...,0.175169,34.543179,2.076678,44.244167,-2.892004,61.476616,blues.00093.wav,0.0,30.0,blues
1,0.336742,0.094685,0.158012,0.002718,1441.999254,387323.611215,1870.239191,146888.711178,3083.264689,2196577.0,...,0.951334,64.032364,2.006461,68.354385,-5.735763,84.034637,blues.00087.wav,0.0,30.0,blues
2,0.400909,0.089653,0.182339,0.003975,1945.620794,147369.551227,2082.176477,73369.006677,4175.766416,806804.4,...,-0.325246,56.951794,-2.984734,42.425682,0.714617,55.56815,blues.00050.wav,0.0,30.0,blues
3,0.39016,0.086286,0.136178,0.000467,2279.265543,170943.704795,2375.084795,53001.032308,5198.476899,570163.0,...,-0.739238,56.313297,-5.130451,96.007965,3.307132,92.614929,blues.00044.wav,0.0,30.0,blues
4,0.414258,0.082975,0.257949,0.00273,2333.71639,89830.069266,2227.415627,30643.873748,4942.928444,481762.2,...,3.168496,30.374691,-4.571702,46.855358,-4.033881,35.250755,blues.00078.wav,0.0,30.0,blues


## Examine Data

Gain insights into data. 

In [31]:
# Get summary statistics
df.describe(include='all')

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,filename,start,end,genre
count,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,990.0,...,990.0,990.0,990.0,990.0,990.0,990.0,990,990.0,990.0,990
unique,,,,,,,,,,,...,,,,,,,990,,,10
top,,,,,,,,,,,...,,,,,,,blues.00093.wav,,,blues
freq,,,,,,,,,,,...,,,,,,,1,,,100
mean,0.378526,0.08638,0.130414,0.003054,2199.327355,470209.5,2239.681058,137032.649395,4565.584859,1843171.0,...,0.510834,63.87534,-2.332108,66.380997,-1.107885,70.208977,,0.0,30.0,
std,0.081866,0.007767,0.065648,0.00365,716.918309,402149.4,527.173141,96744.108318,1576.215424,1429330.0,...,3.883969,34.514214,3.769515,37.32135,3.849626,45.418789,,0.0,0.0,
min,0.17177,0.044578,0.00527,4e-06,570.150688,7957.773,899.979779,11160.012397,749.528801,14807.54,...,-11.985029,15.395742,-18.505476,13.382755,-19.933725,7.877424,,0.0,30.0,
25%,0.319397,0.082381,0.086429,0.00094,1623.214609,184084.0,1903.010596,67181.287301,3377.483743,769023.7,...,-2.011947,41.861622,-4.696787,41.768799,-3.377763,42.257663,,0.0,30.0,
50%,0.383055,0.086644,0.120666,0.001817,2208.737431,339212.8,2218.084461,112497.079752,4658.484331,1467297.0,...,0.668484,55.16939,-2.39383,57.55711,-1.185065,59.217588,,0.0,30.0,
75%,0.43547,0.091295,0.175362,0.003575,2685.204362,615131.9,2575.64777,181960.326723,5525.670754,2560563.0,...,3.126128,76.165087,0.156752,78.93309,1.287499,85.776144,,0.0,30.0,


## Process single mp3 for validation

In [32]:
def process_single_mp3(mp3_file_path):
    wav_file_path = convert_mp3_to_wav(mp3_file_path)
    if wav_file_path:
        features_list = extract_features(wav_file_path)
        if features_list:
            df_new = pd.DataFrame(features_list)
            output_csv_filename = os.path.basename(mp3_file_path).replace('.mp3', '_features.csv')
            output_csv_path = os.path.join(os.path.dirname(mp3_file_path), '..', output_csv_filename)
            df_new.to_csv(output_csv_path, index=False)
            print(f"Features extracted and saved to {output_csv_path}")
        else:
            print("No features extracted.")
    else:
        print("Failed to convert MP3 to WAV.")

Usage of validation data generator.

In [33]:
# Example usage
mp3_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/inputs/file.mp3'
process_single_mp3(mp3_file_path)

Features extracted and saved to /Users/isaiah/Desktop/Career/Projects/music-genre-detector/inputs/../file_features.csv
