## This script partitions the input directory into a demoSounds directory and uploads the demo files to the database

In [1]:
import os
import shutil
import random
import math
import os
import librosa
import numpy as np
from scipy.spatial.distance import cdist
from IPython.display import Audio, display

Upload demo files to mongoDB

In [2]:
def load_audio_features(file_path, n_mfcc=13):
    """
    Load an audio file and extract MFCC features.
    """
    y, sr = librosa.load(file_path, sr=None)
    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    # Compute the mean of the MFCCs over time
    mfcc_mean = np.mean(mfcc.T, axis=0)
    return mfcc_mean

def get_all_audio_features(directory, n_mfcc=13):
    """
    Load all .ogg files in the directory and extract their MFCC features.
    """
    audio_features = []
    for filename in os.listdir(directory):
        if filename.endswith('.ogg'):
            file_path = os.path.join(directory, filename)
            print(f"Processing {filename}... ({list(os.listdir(directory)).index(filename) + 1}/{len(os.listdir(directory))})")
            features = load_audio_features(file_path, n_mfcc)
            audio_features.append({
                "file_path": file_path,
                "title": filename,
                "embedding": features.tolist()
            })
    return audio_features

In [3]:
# Set your musicML directory path
musicml_directory = './inputs'  # Replace with your actual directory path

# Get features for all demo files
demo_features = get_all_audio_features(musicml_directory)

Processing Canvas Kit Melodic 03 94 bpm.alc.ogg... (1/678)
Processing Bending Boom.adg.ogg... (2/678)
Processing Move Up Kit groove 03_110 bpm.alc.ogg... (3/678)
Processing Fluff Face Bass.adg.ogg... (4/678)
Processing Tempo Wah Clav Chords D Minor 94 bpm.alc.ogg... (5/678)
Processing Muddy Whirl Piano E Minor 01 85 bpm.alc.ogg... (6/678)
Processing Constant Pad G Minor 92 bpm.alc.ogg... (7/678)
Processing Analog Flute Lead.adg.ogg... (8/678)
Processing Little Amp Electric Piano.adg.ogg... (9/678)
Processing Beat That D Minor 98bpm.alc.ogg... (10/678)
Processing Kingsway Kit Melodic 02 100 bpm.alc.ogg... (11/678)
Processing 212 Kit Melodic 01 90 bpm.alc.ogg... (12/678)
Processing Little Amp Electric Piano Chords Bb minor 77bpm.alc.ogg... (13/678)
Processing Bi-Phase Sector.adg.ogg... (14/678)
Processing A Trying Sine D Minor 02 88 bpm.alc.ogg... (15/678)
Processing Dropped A Bass.adg.ogg... (16/678)
Processing Jiggy Groove 1 100 Bpm.alc.ogg... (17/678)
Processing Dirty Funk Lead.adg.og

In [4]:
# upload the files and features to the database
# SCHEMA:
# {
#     "filename": "string",
#     "file_path": "string",
#     "features": "array"
# }
from pymongo import MongoClient
import gridfs

# Connect to MongoDB
uri = "mongodb+srv://ethanjags1:OrIjEQHBSzR0k1GJ@demo-sounds.jax2c.mongodb.net/?retryWrites=true&w=majority&appName=demo-sounds"
client = MongoClient(uri)
db = client["soundDB"]
fs = gridfs.GridFS(db)

# Upload files and save metadata
for sound in demo_features:
    with open(sound["file_path"], "rb") as file:
        # Save the file to GridFS
        file_id = fs.put(file, filename=sound["file_path"].split("/")[-1])

        # Store metadata in the demo_sounds collection
        db.demo_sounds.insert_one({
            "file_id": file_id,
            "title": sound["title"],
            "embedding": sound["embedding"]
        })

print("Demo sounds uploaded successfully!")

Demo sounds uploaded successfully!
