# Supervised Learning Music Genre Classification

## Import Statements

Start by importing necessary libraries.

In [842]:
import os
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import tempfile
from pydub import AudioSegment
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, roc_curve, RocCurveDisplay, roc_auc_score, f1_score
from sklearn.model_selection import learning_curve

## Define Data Paths

Specify the paths to the CSV files containing data.

## Load Input CSV Data Function

A function to load data from CSV and assign labels.

Gain insights into data. 

Convert to MP3 function

In [None]:
def convert_mp3_to_wav(mp3_file):
    try:
        sound = AudioSegment.from_mp3(mp3_file)
        wav_file = tempfile.mktemp(suffix='.wav')
        sound.export(wav_file, format="wav")
        return wav_file
    except Exception as e:
        print(f"Error converting {mp3_file} to WAV: {e}")
        return None

## Load, Label and Segment

Load data for each activity, segment and combine into dataset. 

In [847]:
# Load, segment, and label the data
labels = {'Walking': 0, 'Jumping': 1} # Activity labels
segmented_data = []
labels_list = []
for path, activity in zip(csv_paths, activity_labels): # Iterate over the paths and labels
    data = load_csv_data(path) # Load the data
    segments = segment_into_windows(data) # Segment the data (segments holds a list of dataframes)
    segmented_data.extend(segments) # Extend the segmented data list by the segments
    labels_list.extend([labels[activity]] * len(segments)) # Extend the labels list by the labels

## Shuffle and Split data

Split data into training and testing sets (90/10).

In [848]:
# Convert to numpy arrays for shuffling 
X = np.array([segment[['Linear Acceleration x (m/s^2)', 'Linear Acceleration y (m/s^2)', 'Linear Acceleration z (m/s^2)']].values for segment in segmented_data]) # Extract the features needed, dropping time and absolute acceleration
y = np.array(labels_list) # Convert the labels list to a numpy array

# Shuffle and split the data into training and testing sets
X, y = shuffle(X, y, random_state=11) # Shuffle the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=11) # Split data and labels into train and test set (must remain separate for rest of the code)

## Begin preprocessing

Apply the low pass filter through use of a moving average with a window size of 5.

In [857]:
def moving_average_filter(data, window_size=5): #  Define the moving average filter
    filtered_data = np.zeros_like(data) # Create an array to store the filtered data (default is zeros)
    for segment in range(data.shape[0]):
        for feature in range(data.shape[2]):
            df = pd.DataFrame(data[segment, :, feature])  # Convert the time series for the current segment and feature into a DataFrame
            rolling_mean = df.rolling(window_size, center=True, min_periods=1).mean() # Apply rolling mean
            filtered_data[segment, :, feature] = rolling_mean.to_numpy().flatten() # Store the results back into the filtered_data array
    return filtered_data

# Assuming X_train and X_test are your data
X_train_filtered = moving_average_filter(X_train, window_size=5)
X_test_filtered = moving_average_filter(X_test, window_size=5)

(162, 497, 3) (18, 497, 3)


## Create function for feature extraction

Function for feature extraction focuses on 10 core data metrics, applying these across all 3 axis to completely capture patterns. 

In [859]:
# Function to extract features from an audio file
def extract_features(audio_file):
    try:
        y, sr = librosa.load(audio_file, sr=None)
        
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
        harmony, perceptr = librosa.effects.hpss(y)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        
        features = {
            'filename': os.path.basename(audio_file),
            'length': len(y) / sr,
            'chroma_stft_mean': chroma_stft.mean() if chroma_stft.size else 0,
            'chroma_stft_var': chroma_stft.var() if chroma_stft.size else 0,
            'rms_mean': rms.mean() if rms.size else 0,
            'rms_var': rms.var() if rms.size else 0,
            'spectral_centroid_mean': spectral_centroid.mean() if spectral_centroid.size else 0,
            'spectral_centroid_var': spectral_centroid.var() if spectral_centroid.size else 0,
            'spectral_bandwidth_mean': spectral_bandwidth.mean() if spectral_bandwidth.size else 0,
            'spectral_bandwidth_var': spectral_bandwidth.var() if spectral_bandwidth.size else 0,
            'rolloff_mean': rolloff.mean() if rolloff.size else 0,
            'rolloff_var': rolloff.var() if rolloff.size else 0,
            'zero_crossing_rate_mean': zero_crossing_rate.mean() if zero_crossing_rate.size else 0,
            'zero_crossing_rate_var': zero_crossing_rate.var() if zero_crossing_rate.size else 0,
            'harmony_mean': harmony.mean() if harmony.size else 0,
            'harmony_var': harmony.var() if harmony.size else 0,
            'perceptr_mean': perceptr.mean() if perceptr.size else 0,
            'perceptr_var': perceptr.var() if perceptr.size else 0,
            'tempo': tempo,
        }
        
        for i in range(1, 21):
            features[f'mfcc{i}_mean'] = mfcc[i-1].mean() if mfcc.shape[0] >= i else 0
            features[f'mfcc{i}_var'] = mfcc[i-1].var() if mfcc.shape[0] >= i else 0
        
        return features
    except Exception as e:
        print(f"Error extracting features from {audio_file}: {e}")
        return None

Feature extraction function is applied to the filtered data. 

In [860]:
# Initialize lists to hold the features for all segments
train_features_list = []
test_features_list = []

for i in range(X_train_filtered.shape[0]): # Extract features from each segment
    segment_features = extract_features_from_segment(X_train_filtered[i])
    train_features_list.append(segment_features) 

   
for i in range(X_test_filtered.shape[0]):
    segment_features = extract_features_from_segment(X_test_filtered[i])
    test_features_list.append(segment_features)

# Convert the list of features to pandas DataFrames
train_features_df = pd.DataFrame(train_features_list)
test_features_df = pd.DataFrame(test_features_list)

## Z Score normalization is done to the extracted features 

Using StandardScaler to properly normalize the individual features for the train and test set, converting back to data frames upon completion. 

## Run model

Model is intialized, trained and test with accuracy scores displayed. 

## Accuracy Testing

Check the model to ensure it is doing well in terms of classifications.

## Cross-validation 

To ensure that the model is functioning well, cross-validation is done with an average result shown. 