In [1]:
import torch
from torch.utils.data import Dataset

import torchaudio
import torchaudio.transforms

import sys, os

from pprint import pprint

from tqdm.autonotebook import tqdm

import json

import numpy as np

import matplotlib.pylab as plt
import seaborn as sns

import librosa
import librosa.display

import pandas as pd

from pathlib import Path

import gc

MANUAL_SEED = 69

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from datetime import date
from datetime import datetime

import os.path
from os import path
  
import json

import time

import copy

from matplotlib import pyplot as plt
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 150

from sklearn.decomposition import PCA



In [2]:
def make_dir_if_absent(dir_path):
  
  # print("making dir: ", dir_path)
  
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)
    

In [3]:
class FMADataset(Dataset):

  def __init__(
    self, path_waveform, path_mel_spectrogram
  ):
    self.path_waveform = path_waveform
    self.path_mel_spectrogram = path_mel_spectrogram
    
    self.data_paths_waveform = self._load_audio_list(self.path_waveform)
    self.data_paths_mel_spectrogram = self._load_audio_list(self.path_mel_spectrogram)

  def __len__(self):
    return len(self.data_paths_waveform)

  def __getitem__(self, idx):
    
    data_waveform = torch.load(self.data_paths_waveform[idx])
    data_mel_spectrogram = torch.load(self.data_paths_mel_spectrogram[idx])

    return data_mel_spectrogram, data_waveform
    
  def _load_audio_list(self, waveform_or_mel_spec_path):
    
    audio_path_list = []
    
    for path, subdirs, files in tqdm(
      os.walk(waveform_or_mel_spec_path), colour="magenta"
    ):
      for name in files:
          
        file_audio_path = os.path.join(path, name)
        
        audio_path_list.append(file_audio_path)
        
    return audio_path_list
        
        

In [4]:
DATASET_SIZE = "l"
DATASET_FOLDER_WAVEFORM = f"../data/waveform"
DATASET_FOLDER_MEL_SPECTROGRAM = f"../data/mel_spectrogram"

DATASET_NUM_SAMPLES_PER_SECOND = 8000
DATASET_NUM_CHANNELS = 1

DATASET_NAME = f"fma_{DATASET_SIZE}_resampled_{DATASET_NUM_SAMPLES_PER_SECOND}_rechanneled_{DATASET_NUM_CHANNELS}"

DATASET_NAME_WAVEFORM = DATASET_NAME

MEL_SPECTROGRAM_N_FFT = 1024
MEL_SPECTROGRAM_WIN_LENGTH = None
MEL_SPECTROGRAM_HOP_LENGTH = 128
MEL_SPECTROGRAM_N_MELS =  128
DATASET_NAME_MEL_SPECTROGRAM = f"fma_{DATASET_SIZE}_resampled_{DATASET_NUM_SAMPLES_PER_SECOND}_rechanneled_{DATASET_NUM_CHANNELS}_n_fft_{MEL_SPECTROGRAM_N_FFT}_win_length_{MEL_SPECTROGRAM_WIN_LENGTH}_hop_length_{MEL_SPECTROGRAM_HOP_LENGTH}_n_mels_{MEL_SPECTROGRAM_N_MELS}"

dataset_path_waveform = f"{DATASET_FOLDER_WAVEFORM}/{DATASET_NAME_WAVEFORM}"
dataset_path_mel_spectrogram = f"{DATASET_FOLDER_MEL_SPECTROGRAM}/{DATASET_NAME_MEL_SPECTROGRAM}"

TRAINING_LOGS_FOLDER = "./logs"



In [5]:
fma_dataset = FMADataset(
  path_waveform=dataset_path_waveform, 
  path_mel_spectrogram=dataset_path_mel_spectrogram
)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [6]:
def get_mean_std(loader):
    # var[X] = E[X**2] - E[X]**2
    channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0

    for data, _ in tqdm(loader, colour="#32CD32"):
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_sqrd_sum += torch.mean(data ** 2, dim=[0, 2, 3])
        num_batches += 1

    mean = channels_sum / num_batches
    std = (channels_sqrd_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [7]:
TRAIN_VAL_PERCENTAGE = 0.9

full_size = len(fma_dataset)
train_val_size = int(TRAIN_VAL_PERCENTAGE * full_size)
test_size = full_size - train_val_size

In [8]:
generator=torch.Generator().manual_seed(MANUAL_SEED)

fma_dataset_train_val, fma_dataset_test = torch.utils.data.random_split(
  fma_dataset, [train_val_size, test_size], generator
)

In [9]:
TRAIN_PERCENTAGE = 0.8

full_size = train_val_size
train_size = int(TRAIN_PERCENTAGE * full_size)
val_size = full_size - train_size

In [10]:
fma_dataset_train, fma_dataset_val = torch.utils.data.random_split(
  fma_dataset_train_val, [train_size, val_size], generator
)

In [11]:
train_loader = torch.utils.data.DataLoader(
  num_workers=16, batch_size=16, dataset=fma_dataset_train
)

In [12]:
mean, std = get_mean_std(train_loader)
print(mean)
print(std)

  0%|          | 0/513 [00:00<?, ?it/s]

tensor([9.8503])
tensor([295.3500])


In [13]:
to_export = {
  "dataset_name": DATASET_NAME,
  "waveform_mean": mean.item(),
  "waveform_std": std.item(),
  "mel_spectrogram_mean": 9.8503,
  "mel_spectrogram_std": 295.3500
}

In [14]:
SUMMARY_STATISTICS_FOLDER = "../data/summary_statistics"

SUMMARY_STATISTICS_NAME = DATASET_NAME

summary_statistics_path = f"{SUMMARY_STATISTICS_FOLDER}/{SUMMARY_STATISTICS_NAME}/{SUMMARY_STATISTICS_NAME}_summary_statistics.json"



In [15]:
make_dir_if_absent(
  dir_path=f"{SUMMARY_STATISTICS_FOLDER}/{SUMMARY_STATISTICS_NAME}"
)

In [16]:
with open(summary_statistics_path, 'w') as fp:
  json.dump(to_export, fp)

In [17]:
to_export

{'dataset_name': 'fma_l_resampled_8000_rechanneled_1',
 'waveform_mean': 9.85030460357666,
 'waveform_std': 295.3499755859375,
 'mel_spectrogram_mean': 9.8503,
 'mel_spectrogram_std': 295.35}