In [1]:
import sys
!{sys.executable} -m pip install essentia
!{sys.executable} -m pip install pymongo



In [2]:
import os
import essentia
import essentia.standard as es
from pymongo import MongoClient

In [3]:
# connect to db
client = MongoClient('mongodb://mongo:27017/')
db = client.music_analysis
songs = db.songs

In [4]:
ignorePrefix = [
    'lowlevel.barkbands',
    'lowlevel.dissonance',
    'lowlevel.erbbands',
    'lowlevel.melbands',
    'lowlevel.mfcc',
    'lowlevel.hfc',
    'lowlevel.loudness_ebu128',
    'lowlevel.pitch_salience',
    'lowlevel.silence_rate',
    'lowlevel.spectral_',
    'lowlevel.zerocrossingrate',
    'tonal.hpcp',
    'rhythm.beats_loudness',
    'rhythm.beats_position',
    'rhythm.bpm_histogram',
    'tonal.chords_histogram',
    'tonal.thpcp',
    'tonal.chords_strength',
    'tonal.tuning_diatonic_strength',
    'tonal.tuning_equal_tempered_deviation',
    'tonal.tuning_nontempered_energy_ratio',
    'tonal.key',
    'lowlevel.gfcc',
    'lowlevel.mfcc',
    'metadata'
]
ignoreSuffix = [
    '.probability',
    '.strength'
]
whitelist = [
    'lowlevel.dissonance.median',
    'rhythm.beats_loudness.median',
    'tonal.chords_strength.median',
    'tonal.key_krumhansl.key',
    'tonal.key_krumhansl.scale'
]

# scan all directories for audio files & store data
# approx 11k songs total
directories = [x[0] for x in os.walk('./../../music/')]
for directory in directories:
    for file in os.listdir(directory):
        if file.endswith('.mp3') or file.endswith('.m4a') or file.endswith('.ogg') or file.endswith('.m4p') or file.endswith('.flac'):
            filename = directory+'/'+file
            # check if already in db
            record = songs.find_one({'filename': filename})
            if record is None:
                # analyze song
                try:
                    features, features_frames = es.MusicExtractor(profile="profile.yml")(filename)
                except RuntimeError:
                    print("Can't analyze "+filename)
                    continue
                # make object to store in mongo
                song = {'filename': filename}
                for stat in features.descriptorNames():
                    if stat in whitelist or (not 'not_' in stat and not stat.startswith(tuple(ignorePrefix)) and not stat.endswith(tuple(ignoreSuffix))):
                        # remove dots from key
                        stat_cleaned = stat.replace('.','')
                        song[stat_cleaned] = features[stat]
                # store in database
                song_id = songs.insert_one(song).inserted_id
                print(filename+' - '+str(song_id))
            else:
                print("Skipping "+filename)

Skipping ./../../music/Gorillaz feat. Daley/Doncamatic/01 Doncamatic.mp3
Skipping ./../../music/Gorillaz feat. Daley/Doncamatic/02 Doncamatic (Instrumental).mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/05 Poison Pit (Why_ remix).mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/08 Dr. Moonorgun Please (Grapedope remix).mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/06 Livetrap (Hood remix).mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/11 Out in the Open (Notwist remix).mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/09 Darkskydemo (Fog remix).mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/01 Terror Fabulous.mp3
Skipping ./../../music/Themselves/The No Music of Aiff's_ The No Music Remixed/04 Good People Check (Hrvatski remix).mp3
Skipping ./../../music/Themselves/The No Musi