In [None]:
# Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

import glob
import os

import librosa
import librosa.display


import torch
from torch import nn
from torchvision import models, transforms, datasets

from time import time
from tqdm import tqdm


In [None]:
'''
Code taken from "https://www.kaggle.com/code/nippani/gtzan-mel-spectrogram-resnet18"
This Kaggle notebook will be used to complete the preprocessing, which is to convert the 
GTZAN audio tracks to MEL Spectrograms. This cell and the next cell will handle this
'''

# Parameters
seed = 12
np.random.seed(seed)

path_to_audios = "Data/genres_original/"

path_imgs = "./mel_spectrogram_imgs/"

batch_size = 32

hop_length = 512

n_fft = 2048

device = 'cuda' if torch.cuda.is_available() else 'cpu'

genre_dict = {"blues":0,"classical":1,"country":2,"disco":3,"hiphop":4,"jazz":5,"metal":6,"pop":7,"reggae":8,"rock":9}

In [None]:
# This uses the audio files directly
print("Transforming the Audio Files into Mel Spectrograms:")

mel_spectogram_data = {}
for genre in genre_dict.keys():
    print("\t",genre)
    
    mel_spectogram_data[genre] = []

    for name in glob.glob(path_to_audios + genre + "/*"):
        
        if(name != "Data/genres_original/jazz/jazz.00054.wav"):
        
            data, sampling_rate = librosa.load(name)

            mel_spec = librosa.feature.melspectrogram(y = data.ravel(), sr=sampling_rate, hop_length = hop_length)
            mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)

            mel_spectogram_data[genre].append(mel_spec_db)
            

print("Saving the Mel Spectrogram Images:")
            
os.mkdir(path_imgs)
for genre in genre_dict.keys():
    print("\t",genre)
    try:
        os.mkdir(path_imgs + genre)
    except:
        pass
    
    for i in range(len(mel_spectogram_data[genre])):

        fig, ax = plt.subplots(1, figsize=(12,8))

        img = librosa.display.specshow(mel_spectogram_data[genre][i], sr = sampling_rate, hop_length = hop_length,cmap = 'cool',ax=ax)

        fig.savefig(path_imgs + genre + "/" + genre + "_" + str(i) + ".png")
        
        plt.close()

In [None]:
# This uses the 3 second features csv that is given

print("Transforming the Audio Files into Mel Spectrograms:")

mel_spectogram_data2 = {}
for genre in genre_dict.keys():
    print("\t", genre)
    
    mel_spectogram_data2[genre] = []

    for name in glob.glob(path_to_audios + genre + "/*"):

        if(name != "Data/genres_original/jazz/jazz.00054.wav"):
        
            data, sampling_rate = librosa.load(name)

            # Extract features
            # 1. Mel spectrogram
            mel_spec = librosa.feature.melspectrogram(y=data, sr=sampling_rate, n_mels=128, hop_length=512)
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
            
            # 2. MFCC
            mfcc = librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=20)
            
            # 3. Chroma features
            chroma = librosa.feature.chroma_stft(y=data, sr=sampling_rate)
            
            # 4. Spectral features
            spectral_centroids = librosa.feature.spectral_centroid(y=data, sr=sampling_rate)
            spectral_rolloff = librosa.feature.spectral_rolloff(y=data, sr=sampling_rate)

            song_features = [mel_spec, mel_spec_db, mfcc, chroma, spectral_centroids, spectral_rolloff]
            mel_spectogram_data2[genre].append(song_features)


In [None]:
print(mel_spectogram_data['jazz'][0])