In [2]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

from glob import glob # list out all files in a directory

import soundfile as sf
import librosa
import librosa.display

from pathlib import Path

from sklearn.model_selection import train_test_split

## 1. Setting directory

In [3]:
audio_files = glob("/Users/mayawiegand/Documents/ECS 171/Project/music-genre-classification/Raw Audio Data/*/*.wav") # creating a list of all of the audio files for all of the genres

## 2. Reading in raw audio data and converting to mel-spectrograms

In [4]:
# initializing containers so we can keep track of how many audio files fail to load and therefore don't get processed
ok = 0
bad = []

# initializing lists to store all of the spectrograms that are created in the loop and their corresponding genre labels and original file paths
spectro_list = []
genre_list = []
paths_list = []

for audio in audio_files:
    try:
        y, sr = sf.read(audio) # using sound file to read in audio, y = raw audio data and sr = sampling rate (how often the audio is sampled by the computer since it isn't continuous like human ears hear it)
        if sr != 22050:
            y = librosa.resample(y, orig_sr=sr, target_sr=22050) # resampling to 22050 to make sure all files have consistent sampling rate
            sr = 22050
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # creating mel spectrogram, n_mels = how many perceptual frequency bands do you want (how finely to slice frequency axis to best represent how humans hear it)
        S_db_mel = librosa.power_to_db(S, ref=np.max) # converting to log decibels (so this can be understood as volume)
        spectro_list.append(S_db_mel) # adding this spectrogram to the list

        current_genre = Path(audio).parent.name # grabbing the folder name of the parent folder which is the genre label
        genre_list.append(current_genre) # adding this to genre list

        current_path = Path(audio) # grabbing the full file path (just in case we need later)
        paths_list.append(current_path) # adding this to file path list
        
        ok += 1
    except Exception as e:
        bad.append((audio, repr(e)))

# printing out the number of audio files that could be successfully loaded and processed, and the number that failed
# keeping track of the audio files that failed

print("Loaded:", ok)
print("Failed:", len(bad))
if bad:
    print("First failure:", bad[0])

Loaded: 999
Failed: 1
First failure: ('/Users/mayawiegand/Documents/ECS 171/Project/music-genre-classification/Raw Audio Data/jazz/jazz.00054.wav', 'LibsndfileError(1, "Error opening \'/Users/mayawiegand/Documents/ECS 171/Project/music-genre-classification/Raw Audio Data/jazz/jazz.00054.wav\': ")')


## 3. Splitting data into training and testing set
- Need to split into training and testing before audio clips are split into smaller segments (to give us more training instances and hopefully improve model) to prevent data leakage
- Audio data typically uses y to represent the audio data - stayed consistent above with this
- Now that training and testing datasets are being built, stayed consistent with ML:
    - y = genre label
    - X = mel-spectrogram

In [None]:
X_train, X_test, y_train, y_test = train_test_split(spectro_list, genre_list, paths_list, test_size=0.2, random_state=42, stratify=genre_list)
# using stratify here so that the genre proportions are consistent 
# train/test split is done separately within each class to make sure each genre is represented proportionately in training and testing data

## 4. Cleaning
- Segment into smaller clips (3-5 seconds) and ensuring clips are all the same length
- Standardize