# ***Muziek classificeren***

|Teamleden|Kaggle Username|GitHub Username|
|--|--|--|
|Busse Heemskerk|bussejheemskerk|BJHeemskerk|
|Declan van den Hoek|declanvdh|DeclanvandenHoek|
|Isa Dijkstra|isadijkstra|IsaD01|

In dit notebook gaan we kleine muziek samples classificeren met behulp van unsupervised learning. Een deel van deze bestand heeft een genre label, terwijl de meeste dit niet zullen hebben. Aan ons is de taak om zo accuraat mogelijk te bepalen welke genres de unlabeled samples hebben, door middel van Unsupervised Learning.

Voor het project hebben we gewerkt in [GitHub](https://github.com/BJHeemskerk/MachineLearning/tree/main/Muziek), om makkelijk de bestanden te delen. Van elk model zijn de voorspellingen ook geupload naar [Kaggle](https://www.kaggle.com/competitions/muziek-genre-clustering/overview).

## **Libaries en data inladen** <a name='h1'></a>

In [4]:
import os
import librosa as lr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from librosa.core import stft
from librosa.core import amplitude_to_db
from librosa.display import specshow

# Om audio af te kunnen spelen
from IPython.display import Audio

In de onderstaande cel, die gebaseerd is op de code van het Machine Learning notebook uit week 11, wordt de data ingelezen en in een dataframe gezet.

In [5]:
def mfccs(data, sfreq):
    mfcc = lr.feature.mfcc(y=data, sr=sfreq)
    datadict = {}

    for var in range(len(mfcc)):
        datadict[f'mfcc{var + 1}_mean'] = np.mean(mfcc[var, :])

    return datadict

def calculate_spectrograms(audio_clips, n_fft=2048, hop_length=512, win_length=None):
    """
    Calculate the spectrogram for each audio clip in the audio_clips array.

    Parameters:
    - audio_clips: A list of audio clips (numpy arrays).
    - n_fft: The number of data points used in each block for the FFT (default 2048).
    - hop_length: The number of samples between successive frames (default 512).
    - win_length: The window size (default is `n_fft`).

    Returns:
    - spectrograms: A list of spectrograms corresponding to each audio clip.
    - spec_db: The spectrograms in decibels, for plotting
    """
    spectrograms = []
    spectrograms_db = []

    for clip in audio_clips:

        # Calculate the STFT. Use lr.stft() here.
        stft_matrix = stft(y=clip,
                           n_fft=n_fft,
                           hop_length=hop_length,
                           win_length=win_length)

        # Calculate the magnitude of the STFT (spectrogram). Use np.abs() here.
        spectrogram = np.abs(stft_matrix)

        # Convert to decibels. Use amplitude_to_db() here.
        spec_db = amplitude_to_db(S=spectrogram,
                                  ref=np.max)

        # Append the spectrogram to the list
        spectrograms.append(spectrogram)

        # Append spectrogram in decibels to the list
        spectrograms_db.append(spec_db)

    return spectrograms, spectrograms_db

from librosa.feature import spectral_bandwidth, spectral_centroid

def calculate_spectral_features(spectrograms):
    """
    Calculate the centroid and bandwidth for each spectrogram in a list.

    Parameters:
    - spectrograms: A list of spectrograms.

    Returns:
    - bandwidths: A list of bandwidths corresponding to each spectrogram
    - centroids: A list of centroids corresponding to each spectrogram
    """
    bandwidths = []
    centroids = []

    for spectrogram in spectrograms:

        # Calculate the bandwidth for the spectrogram (use lr.feature.spectral_bandwidth)
        spec_bw = spectral_bandwidth(S=spectrogram)

        # Calculate the spectral centroid for the spectrogram (use lr.feature.spectral_centroid)
        spec_cn = spectral_centroid(S=spectrogram)

        # Append the spectral bandwidth to the list
        bandwidths.append(spec_bw)

        # Append the spectral centroid to the list
        centroids.append(spec_cn)

    return bandwidths, centroids

In [6]:
# Load labeled data from CSV
labeled_data = pd.read_csv("labels_new.csv", sep=',')
labeled_data = labeled_data.sort_values('filename')

# Map pakken met de juiste samples
base_dir = "labeled"

# Aanmaken lists voor data
audio_data = []
sample_freqs = []
mfcc_data = {}

# Lengte is 30 sec op 22050Hz
lengte = 30 * 22050

# Process each audio file
for file in os.listdir(base_dir):
    if file.endswith(".wav"):
        file_path = os.path.join(base_dir, file)
        data, sfreq = lr.load(file_path, sr=None)

        # Truncate or pad the audio
        if len(data) > lengte:
            # Truncate the data
            data = data[:lengte]
        elif len(data) < lengte:
            # Pad with zeros
            padding = lengte - len(data)
            data = np.pad(data, (0, padding), mode='constant')

        # Append the processed data and label
        audio_data.append(data)
        sample_freqs.append(sfreq)

        # Make the mfcc data using your custom function
        mfcc_dict = mfccs(data, sfreq)

        # Update the dictionary with individual MFCC coefficients
        for key, value in mfcc_dict.items():
            if key not in mfcc_data:
                mfcc_data[key] = []
            mfcc_data[key].append(value)

# Convert to numpy array
audio_data = np.stack(audio_data, axis=0)
sample_freqs = np.array(sample_freqs)

# Create a DataFrame from the processed audio data
audio_df = pd.DataFrame({
    'filename': os.listdir(base_dir),
    'data': audio_data.tolist(),
    'Hz': sample_freqs.tolist(),
})

# Add columns for each MFCC coefficient
for key, values in mfcc_data.items():
    audio_df[key] = values

# Merge the labeled_data DataFrame with the audio_df DataFrame based on the 'filename' column
df = labeled_data.merge(audio_df, how='left', on='filename')

# bandwidths and centroids
spectrograms, spectrograms_db = calculate_spectrograms(audio_data, n_fft=2048, hop_length=512, win_length=None)
bandwidths, centroids = calculate_spectral_features(spectrograms)

# Adding these columns to the dataframe
df['mean_bandwidth'] = [np.mean(arr) for arr in bandwidths]
df['mean_centroids'] = [np.mean(arr) for arr in centroids]

# Display the merged DataFrame
display(df.head())

Unnamed: 0,filename,genre,data,Hz,mfcc1_mean,mfcc2_mean,mfcc3_mean,mfcc4_mean,mfcc5_mean,mfcc6_mean,...,mfcc13_mean,mfcc14_mean,mfcc15_mean,mfcc16_mean,mfcc17_mean,mfcc18_mean,mfcc19_mean,mfcc20_mean,mean_bandwidth,mean_centroids
0,m00002.wav,jazz,"[-0.016357421875, -0.0228271484375, -0.0146789...",22050,-298.807953,112.078209,6.48577,28.386517,-6.764679,16.651894,...,-4.326794,1.407904,0.714065,13.479482,9.419415,6.914652,7.877785,-1.78274,1919.91765,1451.498371
1,m00039.wav,reggae,"[-0.09478759765625, -0.15338134765625, -0.1439...",22050,-169.243668,110.447716,-8.553957,43.898693,0.266454,26.646509,...,-8.172387,2.804638,-6.414233,1.102364,-4.261436,4.327076,-3.458247,1.208493,2019.252686,1811.358216
2,m00041.wav,pop,"[0.078033447265625, -0.03765869140625, 0.12664...",22050,-18.854591,71.328522,-3.743232,-1.396592,0.710347,-1.049137,...,2.415428,1.247249,0.971811,-3.58823,0.891752,-0.496283,0.708363,1.672521,2992.192112,3111.061099
3,m00072.wav,disco,"[0.1060791015625, 0.0849609375, 0.062103271484...",22050,-69.599335,83.05957,-16.599524,0.119469,7.415704,0.769619,...,-5.765528,-0.266381,-3.210432,-1.885092,4.043784,3.654379,1.913874,3.608692,2709.990169,2625.095044
4,m00096.wav,disco,"[-0.03607177734375, -0.105682373046875, -0.201...",22050,-91.886307,87.604057,-2.058175,34.285538,-18.15337,19.344702,...,-9.81162,9.113958,-11.469915,-1.051909,-11.64962,0.293441,-1.279737,0.098982,2486.02065,2550.135384


Nu de data is ingeladen kunnen we met behulp van de Audio functie een bestand afspelen in het notebook.

In [33]:
# Pick a random audio clip
random_index = np.random.choice(len(df), size=1, replace=False).item()

# Access the data, Hz, filename, and genre
clip = np.array(df.at[random_index, 'data'])
sfreq = df.at[random_index, 'Hz']
file = df.at[random_index, 'filename']
genre = df.at[random_index, 'genre']

# Print the name and genre
print(f"Bestand en genre: {file}, {genre}")

# Play the clip
Audio(data=clip, rate=sfreq)


Bestand en genre: m00553.wav, disco


## **Feature Engineering** <a name='h2'></a>

In [36]:
from librosa.core import stft
from librosa.core import amplitude_to_db
from librosa.display import specshow

def calculate_spectrograms(audio_clips, n_fft=2048, hop_length=512, win_length=None):
    """
    Calculate the spectrogram for each audio clip in the audio_clips array.

    Parameters:
    - audio_clips: A list of audio clips (numpy arrays).
    - n_fft: The number of data points used in each block for the FFT (default 2048).
    - hop_length: The number of samples between successive frames (default 512).
    - win_length: The window size (default is `n_fft`).

    Returns:
    - spectrograms: A list of spectrograms corresponding to each audio clip.
    - spec_db: The spectrograms in decibels, for plotting
    """
    spectrograms = []
    spectrograms_db = []

    for clip in audio_clips:

        # Calculate the STFT. Use lr.stft() here.
        stft_matrix = stft(y=clip,
                           n_fft=n_fft,
                           hop_length=hop_length,
                           win_length=win_length)

        # Calculate the magnitude of the STFT (spectrogram). Use np.abs() here.
        spectrogram = np.abs(stft_matrix)

        # Convert to decibels. Use amplitude_to_db() here.
        spec_db = amplitude_to_db(S=spectrogram,
                                  ref=np.max)

        # Append the spectrogram to the list
        spectrograms.append(spectrogram)

        # Append spectrogram in decibels to the list
        spectrograms_db.append(spec_db)

    return spectrograms, spectrograms_db

from librosa.feature import spectral_bandwidth, spectral_centroid

def calculate_spectral_features(spectrograms):
    """
    Calculate the centroid and bandwidth for each spectrogram in a list.

    Parameters:
    - spectrograms: A list of spectrograms.

    Returns:
    - bandwidths: A list of bandwidths corresponding to each spectrogram
    - centroids: A list of centroids corresponding to each spectrogram
    """
    bandwidths = []
    centroids = []

    for spectrogram in spectrograms:

        # Calculate the bandwidth for the spectrogram (use lr.feature.spectral_bandwidth)
        spec_bw = spectral_bandwidth(S=spectrogram)

        # Calculate the spectral centroid for the spectrogram (use lr.feature.spectral_centroid)
        spec_cn = spectral_centroid(S=spectrogram)

        # Append the spectral bandwidth to the list
        bandwidths.append(spec_bw)

        # Append the spectral centroid to the list
        centroids.append(spec_cn)

    return bandwidths, centroids

# Use functions
spectrograms, spectrograms_db = calculate_spectrograms(audio_data, n_fft=2048, hop_length=512, win_length=None)
bandwidths, centroids = calculate_spectral_features(spectrograms)

mean_bw = [np.mean(arr) for arr in bandwidths]
mean_cent = [np.mean(arr) for arr in centroids]

features = df[['filename', 'genre']].copy()

features['mean_bandwidth'] = mean_bw
features['mean_centroids'] = mean_cent
features

Unnamed: 0,filename,genre,mean_bandwidth,mean_centroids
0,m00002.wav,jazz,1919.91765,1451.498371
1,m00039.wav,reggae,2019.252686,1811.358216
2,m00041.wav,pop,2992.192112,3111.061099
3,m00072.wav,disco,2709.990169,2625.095044
4,m00096.wav,disco,2486.02065,2550.135384
5,m00102.wav,classical,1517.85616,1173.931066
6,m00112.wav,reggae,2595.217468,2113.391954
7,m00138.wav,reggae,2295.46624,1995.600514
8,m00192.wav,classical,1965.028715,1878.77403
9,m00206.wav,hiphop,2295.456105,2735.795802


## MFCC functie

In [9]:
def mfccs(data, sfreq):
    mfcc = lr.feature.mfcc(y=data, sr=sfreq)
    datadict = {}

    for var in range(len(mfcc)):
        datadict[f'mfcc{var + 1}_mean'] = np.mean(mfccs[var, :])

    return datadict