# Supervised Learning Music Genre Classification

## Import Statements

Start by importing necessary libraries.

In [9]:
import os
import librosa
import pandas as pd
from pydub import AudioSegment
import tempfile
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Helper Functions

Convert to MP3 function.

In [10]:
def convert_mp3_to_wav(mp3_file):
    try:
        sound = AudioSegment.from_mp3(mp3_file)
        wav_file = tempfile.mktemp(suffix='.wav')
        sound.export(wav_file, format="wav")
        return wav_file
    except Exception as e:
        print(f"Error converting {mp3_file} to WAV: {e}")
        return None

Feature extraction from a segment. 

In [11]:
# Function to extract features from an audio segment
def extract_features_from_segment(y, sr, start_time, end_time):
    segment = y[start_time:end_time]

    # Compute chroma feature from the waveform and sample rate
    chroma_stft = librosa.feature.chroma_stft(y=segment, sr=sr)
    # Compute Root Mean Square (RMS) energy for each frame
    rms = librosa.feature.rms(y=segment)
    # Compute spectral centroid, which indicates where the center of mass for a sound is located
    spectral_centroid = librosa.feature.spectral_centroid(y=segment, sr=sr)
    # Compute spectral bandwidth, which is a measure of the width of the band of frequencies
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=segment, sr=sr)
    # Compute spectral rolloff point, which is the frequency below which a specified percentage of the total spectral energy lies
    rolloff = librosa.feature.spectral_rolloff(y=segment, sr=sr)
    # Compute zero crossing rate, which is the rate at which the signal changes sign
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=segment)
    # Compute harmony and perceptr (percussive) components of the audio
    harmony, perceptr = librosa.effects.hpss(segment)
    # Compute tempo (beats per minute)
    tempo, _ = librosa.beat.beat_track(y=segment, sr=sr)
    # Compute Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20)

    # Aggregate the features into a dictionary
    features = {
        'chroma_stft_mean': chroma_stft.mean() if chroma_stft.size else 0,
        'chroma_stft_var': chroma_stft.var() if chroma_stft.size else 0,
        'rms_mean': rms.mean() if rms.size else 0,
        'rms_var': rms.var() if rms.size else 0,
        'spectral_centroid_mean': spectral_centroid.mean() if spectral_centroid.size else 0,
        'spectral_centroid_var': spectral_centroid.var() if spectral_centroid.size else 0,
        'spectral_bandwidth_mean': spectral_bandwidth.mean() if spectral_bandwidth.size else 0,
        'spectral_bandwidth_var': spectral_bandwidth.var() if spectral_bandwidth.size else 0,
        'rolloff_mean': rolloff.mean() if rolloff.size else 0,
        'rolloff_var': rolloff.var() if rolloff.size else 0,
        'zero_crossing_rate_mean': zero_crossing_rate.mean() if zero_crossing_rate.size else 0,
        'zero_crossing_rate_var': zero_crossing_rate.var() if zero_crossing_rate.size else 0,
        'harmony_mean': harmony.mean() if harmony.size else 0,
        'harmony_var': harmony.var() if harmony.size else 0,
        'perceptr_mean': perceptr.mean() if perceptr.size else 0,
        'perceptr_var': perceptr.var() if perceptr.size else 0,
        'tempo': tempo,
    }

    for i in range(1, 21):
        features[f'mfcc{i}_mean'] = mfcc[i-1].mean() if mfcc.shape[0] >= i else 0
        features[f'mfcc{i}_var'] = mfcc[i-1].var() if mfcc.shape[0] >= i else 0

    return features


Segment data and call feature extraction.

In [12]:
# Function to extract features from an audio file
def extract_features(audio_file, segment_duration=3):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_file, sr=None)
        total_duration = len(y) / sr
        segment_length = int(sr * segment_duration)

        features_list = []

        for start in range(0, len(y), segment_length):
            end = start + segment_length
            if end <= len(y):
                segment_features = extract_features_from_segment(y, sr, start, end)
                segment_features['filename'] = os.path.basename(audio_file)
                segment_features['start'] = start / sr
                segment_features['end'] = end / sr
                features_list.append(segment_features)

        return features_list

    except Exception as e:
        print(f"Error extracting features from {audio_file}: {e}")
        return []

## Define Data Paths

Specify the paths to the CSV files containing data.

In [13]:
# List of genres
genres = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

# Base folder containing genre subfolders
base_folder_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/GTZan/genres_original'

## Process Input Audio Files

Process input files and label the data. 

In [14]:
# Main function to process all audio files in a folder
def process_audio_folder(folder_path, genre_label):
    results = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.wav') or filename.endswith('.mp3'):
            file_path = os.path.join(folder_path, filename)
            if filename.endswith('.mp3'):
                try:
                    file_path = convert_mp3_to_wav(file_path)
                except Exception as e:
                    print(f"Error converting {file_path} to WAV: {e}")
                    continue
            features_list = extract_features(file_path)
            for features in features_list:
                features['genre'] = genre_label
                results.append(features)
    return results

## Create Singular CSV

Aggregate results of the feature extraction into a singular CSV.

In [15]:
# Collect results from all genres
all_results = []

# Process each genre
for genre in genres:
    print(f"Processing genre: {genre}")
    folder_path = os.path.join(base_folder_path, genre)
    genre_results = process_audio_folder(folder_path, genre)
    all_results.extend(genre_results)
    print(f"Completed processing genre: {genre}")

# Write all results to a single CSV file
print("Writing results to CSV file...")
df = pd.DataFrame(all_results)
csv_file_path = '/Users/isaiah/Desktop/Career/Projects/music-genre-detector/all_genres_audio_features.csv'
df.to_csv(csv_file_path, index=False)
print("CSV file generation completed.")

# I (simon) mainly added this for testing just so I could see when everything is done
print("Feature extraction and CSV generation completed for all genres.")

df.head()

Processing genre: blues


## Begin preprocessing

Gain insights into data. 

In [None]:
# Get summary statistics
df.describe()

# Visualize the distribution of a few features
plt.figure(figsize=(10, 6))
sns.histplot(df['chroma_stft_mean'], kde=True)
plt.title('Distribution of Chroma STFT Mean')
plt.show()

plt.figure(figsize=(10, 6))
sns.histplot(df['tempo'], kde=True)
plt.title('Distribution of Tempo')
plt.show()

Check for duplicates. 

In [None]:
def check_duplicates_or_nulls(data): # Check for duplicates or null values
    # Check for duplicates
    duplicates = data.duplicated().sum() > 0
    
    # Check for any null values
    nulls = data.isnull().sum().sum() > 0
    
    # Return true if either condition met
    return duplicates or nulls

for data in csv_file_path:
    if check_duplicates_or_nulls(data):
        print('Duplicates or null values found in data')
    else:
        print('No duplicates or null values found in data')

## Shuffle and Split data

Split data into training and testing sets (90/10).

Apply the low pass filter through use of a moving average with a window size of 5.

(162, 497, 3) (18, 497, 3)


## Z Score normalization is done to the extracted features 

Using StandardScaler to properly normalize the individual features for the train and test set, converting back to data frames upon completion. 

## Run model

Model is intialized, trained and test with accuracy scores displayed. 

## Accuracy Testing

Check the model to ensure it is doing well in terms of classifications.

## Cross-validation 

To ensure that the model is functioning well, cross-validation is done with an average result shown. 