### Main File for our testing for the project


In [1]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#Keras
import keras
import tensorflow as tf
from keras.utils import to_categorical


2023-04-24 22:05:32.472898: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Testing code from another strategy

In [2]:
# Import the dataset
img_height = 288
img_width = 432
batch_size = 10
data_dir = "../data/Images_Original"

train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    labels = "inferred",
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    shuffle = True,
    batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    labels = "inferred",
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    shuffle = True,
    batch_size=batch_size)


Found 999 files belonging to 10 classes.
Using 800 files for training.
Found 999 files belonging to 10 classes.
Using 199 files for validation.


2023-04-23 15:52:56.766861: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Recent Strategy

# Let's add some functions to generate the full spectrogram and generate segments of the spectrogram so we have more data to train on.

In [3]:
def get_melspectrogram(wav_file_path, length=30, duration_of_segments=5, overlap=False, duration_of_step=1):

    """
    Get mel spectrogram for a given wav file and divide it into parts.

    :param wav_file_path: Path to the source wav file
    :param length: length in seconds of the source audio file. Defaults to 30.
    :param duration_of_segments: duration of segments in seconds. number of segments = length/duration_of_segments. Defaults to 5.
    :param overlap: boolean determining whether slices of audio file will be overlapped or distinct. Defaults to False
    :param duration_of_step: step size from the beginning of one segment to the beginning of the next. Defaults to 1 second. Unused if overlap is False.
    :return: Mel spectrogram of the source wav file. Segments will be saved to a file and it's path will be printed.
    """
    y, sr = librosa.load(wav_file_path, sr=None, duration=length)
    melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)

    # Determine the number of samples in the duration of segments
    samples_per_segment = sr * duration_of_segments

    if overlap:
        samples_per_step = sr * duration_of_step
        num_segments= length // duration_of_step - duration_of_segments + 1
    else:
        num_segments = length // duration_of_segments

    # Loop through the audio signal and extract the segments
    for i in range(num_segments):
        # Get the start and end indices of the segment
        if overlap:
            start = i * samples_per_step
        else:
            start = i * samples_per_segment
        end = start + samples_per_segment

        # Extract the segment from the audio signal
        segment = y[start:end]

        # Compute the mel spectrogram of the segment
        mel_spec_segment = librosa.feature.melspectrogram(y=segment, sr=sr)

        # sample name
        sample_name = wav_file_path.replace("../data/genres_original/","").replace(".wav","")

        # path to save samples to
        sample_name = f'{sample_name.split("/")[0]}/npy/{sample_name.split("/")[1]}'

        # splitting files which contain overlapped vs distinct segments
        if overlap:
            directory = "overlap"
        else:
            directory = "distinct"


        save_path = f'../data/mel_spec_samples/{directory}/{sample_name}_{i}.npy'
        # Save the mel spectrogram to a file
        np.save(save_path, mel_spec_segment)
        """ print(f'Saved segment /mel_spec_samples/{directory}/{sample_name}_{i}.npy') """

    return melspectrogram


def plot_melspectrogram(melspectrogram):
    """
    Plot mel spectrogram using pyplot to visualize the data. Works for both full spectrogram and segments.

    :param melspectrogram: np.array of mel spectrogram generated using librosa.feature.melspectrogram()
    :return: void
    """
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(melspectrogram, ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')
    plt.tight_layout()
    plt.show()


In [4]:
# test segmentation of audio file
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
for test_genre in genres:
    # Make the directories to avoid file not found errors
    os.makedirs(f'../data/mel_spec_samples/distinct/{test_genre}', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/overlap/{test_genre}', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/distinct/{test_genre}/npy', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/overlap/{test_genre}/npy', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/distinct/{test_genre}/png', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/overlap/{test_genre}/png', exist_ok=True)

    # Filename to test functions
    for i in range(0,10):
        sample_number = ""
        if i < 10:
            sample_number = f'0{i}'
        else:
            sample_number = f'{i}'
        test_filename = f'../data/genres_original/{test_genre}/{test_genre}.000{sample_number}.wav'

        # Test the main get_melspectrogram function to generate the full audio spectrogram of a given file, and save segments of itself to files
        mel_spectrogram = get_melspectrogram(test_filename, length=30, duration_of_segments=5)





# Run this if you are getting 'Directory Not Found' errors

In [5]:
# Run this if you are getting 'Directory Not Found' errors

genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
for genre in genres:
    os.makedirs(f'../data/mel_spec_samples/distinct/{genre}', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/overlap/{genre}', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/distinct/{genre}/npy', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/overlap/{genre}/npy', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/distinct/{genre}/png', exist_ok=True)
    os.makedirs(f'../data/mel_spec_samples/overlap/{genre}/png', exist_ok=True)
    os.makedirs

print('done')

done


# Now let's generate the images from the mel spectrogram segments we generated above.

In [None]:
# Function to generate images from mel spectrograms for a given genre
def generate_images_for_genre(genre, num_samples=6, overlap=False):
    """
    Generate images from mel spectrogram segments for a given genre and save them to the same directory as the segments.

    :param genre: string of genre to generate images for
    :param num_samples: number of samples. Defaults to 6.
    :param overlap: boolean determining whether slices of audio file will be overlapped or distinct. Defaults to False. Needed here for directory purposes.
    :return: False if error, True if successful
    """
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
    if genre not in genres:
        print(f'Genre \"{genre}\" not in list of genres')
        return False
    for j in range(0,100):
        sample_number = ""
        if j < 10:
            sample_number = f'0{j}'
        else:
            sample_number = f'{j}'

        # splitting files which contain overlapped vs distinct segments
        if overlap:
            directory = "overlap"
        else:
            directory = "distinct"

        for i in range(num_samples):
            mel_segment = np.load(f'../data/mel_spec_samples/{directory}/{genre}/npy/{genre}.000{sample_number}_{i}.npy')
            plt.figure(figsize=(10, 4))
            librosa.display.specshow(librosa.power_to_db(mel_segment, ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
            plt.colorbar(format='%+2.0f dB')
            plt.title(f'{genre}.00000_{i}.npy')
            plt.tight_layout()
            save_name = f'../data/mel_spec_samples/{directory}/{genre}/png/{genre}.000{sample_number}_{i}.png'
            plt.savefig(save_name, bbox_inches='tight', pad_inches=0)
            plt.close()
            print(f'Saved image: {save_name}')

    return True

### Test that bad boy

#### This will take a minute or two to run

In [None]:
generate_images_for_genre('classical', num_samples=5)

## Now let's generate images for all genre samples

#### This will take a while to run

In [None]:
# test segmentation of audio file
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']


for test_genre in genres:

    for i in range(0,100):
        sample_number = ""
        if i < 10:
            sample_number = f'0{i}'
        else:
            sample_number = f'{i}'



        test_filename = f'../data/genres_original/{test_genre}/{test_genre}.000{sample_number}.wav'


        # Test the main get_melspectrogram function to generate the full audio spectrogram of a given file, and save segments of itself to files
        mel_spectrogram = get_melspectrogram(test_filename, length=30, duration_of_segments=5, overlap=True, duration_of_step=1)

        if mel_spectrogram.shape != (128, 1292):
            print ("bug")
            print(mel_spectrogram.shape)
            print(f'../data/genres_original/{test_genre}/{test_genre}.000{sample_number}.wav')
        # Generate images for all the genre segments
        # COMMENTED CUS WE DONT NEED RN
        # generate_images_for_genre(test_genre, num_samples=6)


In [3]:
"""
:param overlap: boolean determining whether slices of audio file will be overlapped or distinct. Defaults to False. Needed here for directory purposes.
"""
def get_data_and_labels(overlap=False):
    genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

    data = []
    labels = []

    # splitting files which contain overlapped vs distinct segments
    if overlap:
        directory = "overlap"
    else:
        directory = "distinct"

    print(f'Extracting...')

    for genre in genres:
        print(f'Extracting {genre}')
        for file in os.listdir(f'../data/mel_spec_samples/{directory}/{genre}/npy'):

            # Extracting Mel Spectrogram feature
            # Use normalize_melspectrogram to get normalized mel spectrogram features to fit model
            melspectrogram = np.load(f'../data/mel_spec_samples/{directory}/{genre}/npy/{file}')

            if melspectrogram.shape[1] != 216:

                print("Bug")
                print(file)
                print(melspectrogram.shape[0])
                print(melspectrogram.shape[1])

            if melspectrogram.shape[0] != 128:

                print("Bug")
                print(file)
                print(melspectrogram.shape[0])
                print(melspectrogram.shape[1])

            # Extracting Label
            label = genres.index(genre)

            # Appending features and labels
            data.append(melspectrogram)
            labels.append(label)

    print('Finished extracting features and labels for all genres')
    return data, labels

Extracting...
Extracting blues
Extracting classical
Extracting country
Extracting disco
Extracting hiphop
Extracting jazz
Extracting metal
Extracting pop
Extracting reggae
Extracting rock
Finished extracting features and labels for all genres


**RUN CODE FROM HERE ONCE DATA CREATED**

In [26]:
from sklearn.utils import shuffle

x_data, y_data = get_data_and_labels(overlap=True)

def preprocess_mel_spectrogram(mel_spectrogram, normalize=True):
    """
    Preprocesses a mel spectrogram for use in a neural network.

    Args:
        mel_spectrogram (numpy array): Mel spectrogram to be preprocessed.
        normalize (bool, optional): Whether to normalize the mel spectrogram. Defaults to True.

    Returns:
        numpy array: Preprocessed mel spectrogram.
    """
    # Convert to decibels
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    mel_spectrogram = np.float32(mel_spectrogram)

    # Normalize if specified
    if normalize:
        # Normalize to [0, 1]
        mel_spectrogram = (mel_spectrogram - np.min(mel_spectrogram)) / (np.max(mel_spectrogram) - np.min(mel_spectrogram))

    # Convert to 3D array with an additional channel dimension
    mel_spectrogram = np.expand_dims(mel_spectrogram, axis=-1)

    return mel_spectrogram


def train_test_split(x_data, y_data):
    print(np.max(x_data))
    print(np.min(x_data))

    for i in range(0, len(x_data)):
        x_data[i] = preprocess_mel_spectrogram(x_data[i])

    x_shuffled, y_shuffled = shuffle(x_data, y_data, random_state=0)
    test_percentage = 0.2
    test_split = int(1-test_percentage * len(x_shuffled))
    x_test = x_shuffled[test_split:]
    y_test = y_shuffled[test_split:]
    #val_percentage = 0.2
    #val_split = int(1-test_percentage-val_percentage * len(x_shuffled))
    #x_val = x_shuffled[val_split:test_split]
    #y_val = y_shuffled[val_split:test_split]

    x_train = x_shuffled[:test_split]
    y_train = y_shuffled[:test_split]

    return x_train, y_train, x_test, y_test#, x_val, y_val

Extracting...
Extracting blues
Extracting classical
Extracting country
Extracting disco
Extracting hiphop
Extracting jazz
Extracting metal
Extracting pop
Extracting reggae
Extracting rock
Finished extracting features and labels for all genres


# See how it went broski

In [27]:
x_train, y_train, x_test, y_test = train_test_split(x_data, y_data)

print(np.max(x_train))

print(np.min(x_train))

10813.572
0.0
1.0
0.0


**Takes ~ 6 minutes to run on my machine**

In [28]:
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
print("train dataset created")
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
print("test dataset created")
# val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))

train dataset created
test dataset created


**RUN CODE FROM HERE FOR TRAINING**

In [29]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.regularizers import L2

BATCH_SIZE = 10

b_train_dataset = train_dataset.batch(BATCH_SIZE)
b_test_dataset = test_dataset.batch(BATCH_SIZE)


In [30]:
net = Sequential([
    keras.Input(shape=(128, 216, 1)),
    Dropout(0.1),
    Conv2D(16, kernel_size=(5, 5), activation="relu", kernel_regularizer=L2(0.1)),
    Flatten(),
    Dropout(0.5),
    Dense(10, activation="softmax")])

net.compile(loss="sparse_categorical_crossentropy", optimizer="adam",
            metrics=["accuracy"])

earlyStopping = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, mode='min', restore_best_weights=True)

history = net.fit(b_train_dataset, batch_size=BATCH_SIZE, verbose=1, epochs=10, callbacks=[earlyStopping], validation_data = b_test_dataset)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [33]:
print(history.history.keys())

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


` # Normalize the Data so that it has 0 mean and unit variance

In [35]:
net.evaluate(b_test_dataset)



[0.8215222358703613, 0.7357184290885925]

In [None]:
# load saved spectrogram from .npy files and plot them in subplots
test_filename = f'../data/mel_spec_samples/blues/blues.00000_0.npy'
var = np.load(test_filename)

plot_melspectrogram(var)

# Generate normalized mel spectrogram
mel_spectrograms_normalized = normalize_melspectrogram('../data/mel_spec_samples/blues/blues.00000_0.npy')

print(f'shape: {mel_spectrograms_normalized.shape}')
# Plot the first mel spectrogram in the normalized data as a line plot
plt.plot(mel_spectrograms_normalized[0].flatten())
plt.title('Normalized Mel Spectrogram')
plt.xlabel('Mel spectrogram bin')
plt.ylabel('Normalized value')
plt.show()

# Plot the first mel spectrogram in the normalized data as a line plot
plt.plot(var.flatten())
plt.title('Original Mel Spectrogram')
plt.xlabel('Mel spectrogram bin')
plt.ylabel('Normalized value')
plt.show()



