In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from glob import glob # list out all files in a directory

import soundfile as sf
import librosa
import librosa.display

from pathlib import Path

from sklearn.model_selection import train_test_split

## 1. Setting directory

In [2]:
audio_files = glob("/Users/mayawiegand/Documents/ECS 171/Project/music-genre-classification/Raw Audio Data/*/*.wav") # creating a list of all of the audio files for all of the genres

# need to train and test split on just the paths and genre list

copy some of the structure below, create the genre and paths list then split into the training/testing sets based on this

then segment

then make the spectrograms

## 2. Grabbing genre labels and file paths

In [3]:
# initializing lists to store genre labels and original file paths

genre_list = []
paths_list = []

for audio in audio_files:
    current_genre = Path(audio).parent.name # grabbing the folder name of the parent folder which is the genre label
    genre_list.append(current_genre) # adding this to genre list

    current_path = Path(audio) # grabbing the full file path (just in case we need later)
    paths_list.append(current_path) # adding this to file path list

## 3. Splitting data into training and testing set
- Need to split into training and testing before audio clips are split into smaller segments (to give us more training instances and hopefully improve model) to prevent data leakage
- Audio data typically uses y to represent the audio data - stayed consistent above with this
- Now that training and testing datasets are being built, stayed consistent with ML:
    - y = genre label
    - X = path to each of the audio files

In [4]:
# genre list/paths list for training/testing split so don't have to load all audio files, then reload to create spectrograms

X_train, X_test, y_train, y_test = train_test_split(paths_list, genre_list, test_size=0.2, random_state=42, stratify=genre_list)

# using stratify here so that the genre proportions are consistent 
# train/test split is done separately within each class to make sure each genre is represented proportionately in training and testing data

## 4. Segmenting Audo Files, Re-Sampling, and Creating Spectrograms
- Segmenting to give us more training observations
- Needs to be done after training/testing split to prevent data leakage

In [5]:
def create_spectro(audio_list, label_list):
    
    spectro_list = []
    clip_labels = []

    target_sr = 22050
    clip_seconds = 3

    samples_per_clip = clip_seconds * target_sr

    for audio, label in zip(audio_list, label_list):

        y, sr = sf.read(audio) # using sound file to read in audio, y = raw audio data and sr = sampling rate (how often the audio is sampled by the computer since it isn't continuous like human ears hear it)
        
        # converting to one audio channel (need one-dimensional to create the spectrogram)
        if y.ndim == 2:
            y = y.mean(axis=1)
        
        y = y.astype(float) # need y to be a float when computing spectrogram

        if sr != 22050:
            y = librosa.resample(y, orig_sr=sr, target_sr=22050) # resampling to 22050 to make sure all files have consistent sampling rate
            sr = target_sr

        num_clips = len(y) // samples_per_clip # grabbing the total number of clips that will be created based on the length and number of audio files

        for i in range(num_clips): # looping over the range of clips to create each ckip
            start = i * samples_per_clip 
            end = start + samples_per_clip
            clip = y[start:end] # slicing y based on the start and end point to create a clip for this song

            S = librosa.feature.melspectrogram(y=clip, sr=sr, n_mels=128) # creating mel spectrogram, n_mels = how many perceptual frequency bands do you want (how finely to slice frequency axis to best represent how humans hear it)
            S_db_mel = librosa.power_to_db(S, ref=np.max) # converting to log decibels (so this can be understood as volume)
            
            spectro_list.append(S_db_mel) # adding this spectrogram to the list
            clip_labels.append(label)

    return spectro_list, clip_labels

X_train_spec, y_train_labels = create_spectro(X_train, y_train)
X_test_spec, y_test_labels = create_spectro(X_test, y_test)

## 4. Standardization

In [6]:
# using (x-mean of training)/sd of training to standardize both training and testing spectrograms

X_train = np.array(X_train_spec)
X_test = np.array(X_test_spec)

mu = X_train.mean()
sigma = X_train.std() + 1e-8 # adding 1e^-8 prevents the denominator from being 0 (possible result from numerical precision, and parts across a spectrogram can be constant which could lead to 0/very small variance)

X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma


The final X_train, X_test and there corresponding genre labels X_train_labels, y_train_lables are ready for the model.

## 5. Saving Spectrograms

In [7]:
# converting to numpy arrays
X_train = np.array(X_train_spec)
X_test = np.array(X_test_spec)
y_train = np.array(y_train_labels)
y_test = np.array(y_test_labels)

save_dir = Path("processed_data")
save_dir.mkdir(exist_ok=True)

np.save(save_dir / "X_train.npy", X_train)
np.save(save_dir / "X_test.npy", X_test)
np.save(save_dir / "y_train.npy", y_train)
np.save(save_dir / "y_test.npy", y_test)

print(list(save_dir.glob("*")))

[PosixPath('processed_data/y_train.npy'), PosixPath('processed_data/y_test.npy'), PosixPath('processed_data/X_test.npy'), PosixPath('processed_data/X_train.npy')]
