In this first notebook, I get the dataset and create some pre-processing data and functionalities.
The main goal is to create a pandas.DataFrame (out of a .csv file) with training data (X - waveform of audio file, 16000 values) and ground truth (Y - 7 float values for the timbral features).
On my way, I also compute some intermediate mapping data-structures, mainly dictionaries and json files, and store them in serialized pickle files, so that, should I need some data, I don't have to go back to the data-set (and parsing 10000 files with folders structures, ecc.), but I have already everything I need in low-complexity access data-structures (dictionaries, unlike lists, have constant access complexity time O(1)).

In [None]:
!pip install essentia

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting essentia
  Downloading essentia-2.1b6.dev858-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: essentia
Successfully installed essentia-2.1b6.dev858


In [None]:
import json # to read the analysis files with timbral features
import os # for folder and files path concatenation
import glob # to recursively get a list of all files of type x into a folder
import pickle # to serialize data-structure for later use, avoiding re-computation
from essentia.standard import MonoLoader
import numpy
import csv # to create a .csv file with waveform in column 1 and timbral features values in other columns (for ground truth comparison)
import pandas # evaluate the data and format of the .csv file, before moving to the actual NN model creation on another notebook
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# FOLDER PATHS
# change this variable if the global dataset (Freesound One-Shot Percussive Sounds folder) is moved
FS_OneShotPerc_DataSet_Folder = '/content/gdrive/MyDrive/Colab Notebooks/Music Information Retrieval/Assignment/Final/Freesound One-Shot Percussive Sounds'

# You probably will not need to change this
audioFiles_FolderName = 'one_shot_percussive_sounds/'
jsonFiles_FolderName = 'analysis'
pickleFiles_FolderName = '/content/gdrive/MyDrive/Colab Notebooks/Music Information Retrieval/Assignment/Final'

The timbral features are stored in json files, named as the audio file they refer to, and containing a dictionary like {feature: value, ....}
I can create a nested dictionary like {audio_file_name: {feature: value, ...}}

In [None]:
# create a dictionary of audioFileName: audioFilePath
audioFileExt = '*.wav'
audioFilesGlobPath = os.path.join(FS_OneShotPerc_DataSet_Folder, audioFiles_FolderName, '**/', audioFileExt)
audioFiles = glob.glob(audioFilesGlobPath, recursive=True)
print(f'Found {len(audioFiles)} .wav audio files in folder {os.path.join(FS_OneShotPerc_DataSet_Folder, audioFiles_FolderName)}')
audioFileName_FilePathDict = {}
for audioFilePath in audioFiles:
  audioFileName_FilePathDict[os.path.basename(audioFilePath)[:-4]] = audioFilePath
# print(audioFileName_FilePathDict.keys())
# print(audioFileName_FilePathDict.values())
# open a file, where you ant to store the data
with open(os.path.join(pickleFiles_FolderName, 'audioFileName_FilePathDict.p'), 'wb') as audioDict_Pickle:
  pickle.dump(audioFileName_FilePathDict, audioDict_Pickle)

# create a dictionary of jsonFileName: jsonFilePath
jsonFileExt = '*.json'
jsonFilesGlobPath = os.path.join(FS_OneShotPerc_DataSet_Folder, jsonFiles_FolderName, '**/', jsonFileExt)
jsonFiles = glob.glob(jsonFilesGlobPath, recursive=True)
print(f'Found {len(jsonFiles)} .json files in folder {os.path.join(FS_OneShotPerc_DataSet_Folder, jsonFiles_FolderName)}')
jsonFileName_FilePathDict = {}
for jsonFilePath in jsonFiles:
  jsonFileName_FilePathDict[os.path.basename(jsonFilePath)] = jsonFilePath
# print(jsonFileName_FilePathDict.keys())
# print(jsonFileName_FilePathDict.values())
with open(os.path.join(pickleFiles_FolderName, 'jsonFileName_FilePathDict.p'), 'wb') as jsonDict_Pickle:
  pickle.dump(jsonFileName_FilePathDict, jsonDict_Pickle)

Found 10254 .wav audio files in folder /content/gdrive/MyDrive/Colab Notebooks/Music Information Retrieval/Assignment/Final/Freesound One-Shot Percussive Sounds/one_shot_percussive_sounds/
Found 10254 .json files in folder /content/gdrive/MyDrive/Colab Notebooks/Music Information Retrieval/Assignment/Final/Freesound One-Shot Percussive Sounds/analysis


In [None]:
fileName_TimbFeat_Dict = {} # nested dict to be populated with timb features for each audio/json file
timbFeat_Set = ("hardness", "depth", "brightness", "roughness", "warmth", "sharpness", "boominess") # unique timb features

for audioFileName in audioFileName_FilePathDict.keys():
  jSonFileName = audioFileName + '_analysis.json'
  with open(jsonFileName_FilePathDict[jSonFileName]) as jsonFile:
    dict_ = json.load(jsonFile)
    fileName_TimbFeat_Dict[audioFileName] = {key: dict_.get(key) for key in timbFeat_Set}

with open(os.path.join(pickleFiles_FolderName, 'fileName_TimbFeat_Dict.p'), 'wb') as fileName_TimbFeatDict_Pickle:
  pickle.dump(fileName_TimbFeat_Dict, fileName_TimbFeatDict_Pickle)

In [None]:
with open(os.path.join(pickleFiles_FolderName, 'fileName_TimbFeat_Dict.json'), 'w') as fileName_TimbFeatDict_Json:
  json.dump(fileName_TimbFeat_Dict, fileName_TimbFeatDict_Json)

with open(os.path.join(pickleFiles_FolderName, 'jsonFileName_FilePathDict.json'), 'w') as jsonFileName_FilePathDict_Json:
  json.dump(jsonFileName_FilePathDict, jsonFileName_FilePathDict_Json)

with open(os.path.join(pickleFiles_FolderName, 'audioFileName_FilePathDict.json'), 'w') as audioFileName_FilePathDict_Json:
  json.dump(audioFileName_FilePathDict, audioFileName_FilePathDict_Json)

In [None]:
with open(os.path.join(pickleFiles_FolderName, 'audioFileName_FilePathDict.p'), 'rb') as audioFileName_FilePathDict_Pickle:
  audioFileName_FilePathDict = pickle.load(audioFileName_FilePathDict_Pickle)
print(f'Retrieved audioFileName_FilePathDict object with {len(audioFileName_FilePathDict)} keys from pickle file.')

with open(os.path.join(pickleFiles_FolderName, 'fileName_TimbFeat_Dict.p'), 'rb') as fileName_TimbFeat_Dict_Pickle:
  fileName_TimbFeat_Dict = pickle.load(fileName_TimbFeat_Dict_Pickle)
print(f'Retrieved fileName_TimbFeat_Dict object with {len(fileName_TimbFeat_Dict)} keys from pickle file.')

with open(os.path.join(pickleFiles_FolderName, 'audioFileWavef_TimbralFeatures.csv'), 'w') as audioFileWavef_TimbralFeatures_CSV:
    fieldnames = ["freesound_sound_id"]
    for numSamp in range(0, 16000):
      fieldnames.append(str("audio_waveform_sampleN_" + str(numSamp)))
    fieldnames += ["hardness", "depth", "brightness", "roughness", "warmth", "sharpness", "boominess"]
    writer = csv.DictWriter(audioFileWavef_TimbralFeatures_CSV, fieldnames=fieldnames)
    writer.writeheader()
    for audioFIle in audioFileName_FilePathDict.keys():
      audioWavef = MonoLoader(filename=audioFileName_FilePathDict[audioFIle], sampleRate = 16000)()
      audioWavef = numpy.array(audioWavef)
      audioWavef.resize(16000) # 0-padding if not already 16000 samples long
      rowDict = dict()
      rowDict["freesound_sound_id"] = audioFIle
      for numSamp in range(0, 16000):
        numSamp_ColumnName = str("audio_waveform_sampleN_" + str(numSamp))
        rowDict[numSamp_ColumnName] = audioWavef[numSamp]
      rowDict.update({"hardness": fileName_TimbFeat_Dict[audioFIle]["hardness"],
                      "depth": fileName_TimbFeat_Dict[audioFIle]["depth"],
                      "brightness": fileName_TimbFeat_Dict[audioFIle]["brightness"],
                      "roughness": fileName_TimbFeat_Dict[audioFIle]["roughness"],
                      "warmth": fileName_TimbFeat_Dict[audioFIle]["warmth"],
                      "sharpness": fileName_TimbFeat_Dict[audioFIle]["sharpness"],
                      "boominess": fileName_TimbFeat_Dict[audioFIle]["boominess"]})
      writer.writerow(rowDict)
print('Finished creating a .csv file with pre-process and formatted data, ready to feed into sklearn.')

with open(os.path.join(pickleFiles_FolderName, 'audioFileWavef_TimbralFeatures.csv'), 'r') as audioFileWavef_TimbralFeatures_CSV:
  audioFileWavef_TimbralFeatures_DF = pandas.read_csv(audioFileWavef_TimbralFeatures_CSV)
with open(os.path.join(pickleFiles_FolderName, 'audioFileWavef_TimbralFeatures_DF.p'), 'wb') as audioFileWavef_TimbralFeatures_DF_Pickle:
  pickle.dump(audioFileWavef_TimbralFeatures_DF, audioFileWavef_TimbralFeatures_DF_Pickle)
print('Finished creating a .pickle file with a pandas.DataFrame taken out of the .csv file')

Retrieved audioFileName_FilePathDict object with 10254 keys from pickle file.
Retrieved fileName_TimbFeat_Dict object with 10254 keys from pickle file.
Finished creating a .csv file with pre-process and formatted data, ready to feed into sklearn.
Finished creating a .pickle file with a pandas.DataFrame taken out of the .csv file


In [None]:
# audioFileWavef_TimbralFeatures_DF['audio_waveform'] = audioFileWavef_TimbralFeatures_DF['audio_waveform'].apply(lambda x: [float(i) for i in x.replace('[', '').replace(']', '').split(',')])