## This script partitions the input directory into a demoSounds directory and uploads the demo files to the database

In [19]:
import os
import shutil
import random
import math
import os
import librosa
import numpy as np
from scipy.spatial.distance import cdist
from IPython.display import Audio, display

In [18]:

# Create demoSounds directory if it doesn't exist
demo_dir = 'demoSounds'
if not os.path.exists(demo_dir):
    os.makedirs(demo_dir)

# Get list of files from inputs directory
input_dir = 'inputs'
sound_files = []
for root, dirs, files in os.walk(input_dir):
    for file in files:
        if file.endswith(('.ogg')):  # Add other audio extensions if needed
            sound_files.append(os.path.join(root, file))

# Calculate number of files to move (5%)
num_files_to_move = math.ceil(len(sound_files) * 0.05)

# Randomly select files to move
files_to_move = random.sample(sound_files, num_files_to_move)

# Move selected files to demoSounds
for file_path in files_to_move:
    file_name = os.path.basename(file_path)
    destination = os.path.join(demo_dir, file_name)
    shutil.move(file_path, destination)
    print(f"Moved {file_name} to {demo_dir}")

print(f"\nMoved {num_files_to_move} files ({5}% of total) to {demo_dir}")


Moved Safe House Kit Melodic 02 88 bpm.alc.ogg to demoSounds
Moved Fly Situation Kit Melodic 03 108bpm.alc.ogg to demoSounds
Moved Dan Piano D Minor 86 bpm.alc.ogg to demoSounds
Moved Evening Casual E Minor 87bpm.alc.ogg to demoSounds
Moved Space Pad Chords Cminor 80bpm.alc.ogg to demoSounds
Moved Smokey Keys D Minor 88 bpm.alc.ogg to demoSounds
Moved Dirty Funk Lead.adg.ogg to demoSounds
Moved Climb Chord Pad A Minor 94 bpm.alc.ogg to demoSounds
Moved Funk Synth Melody Cminor 80bpm.alc.ogg to demoSounds
Moved High Rise Kit groove 01_99 bpm.alc.ogg to demoSounds
Moved Buttermilk Kit Melodic 07 130 bpm.alc.ogg to demoSounds
Moved Obee 12 Voice Brass.adg.ogg to demoSounds
Moved Numerology Kit Melodic 01 89 bpm.alc.ogg to demoSounds
Moved Move Up Kit groove 01_100 bpm.alc.ogg to demoSounds
Moved Wah Soul Kit 01 86 bpm.alc.ogg to demoSounds
Moved Bang Ya Head Kit Melodic 04 94bpm.alc.ogg to demoSounds
Moved Move up Groove 1 88 Bpm.alc.ogg to demoSounds
Moved Buttermilk Kit Melodic 05 117 b

Upload demo files to mongoDB

In [21]:
def load_audio_features(file_path, n_mfcc=13):
    """
    Load an audio file and extract MFCC features.
    """
    y, sr = librosa.load(file_path, sr=None)
    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    # Compute the mean of the MFCCs over time
    mfcc_mean = np.mean(mfcc.T, axis=0)
    return mfcc_mean

def get_all_audio_features(directory, n_mfcc=13):
    """
    Load all .ogg files in the directory and extract their MFCC features.
    """
    audio_features = []
    for filename in os.listdir(directory):
        if filename.endswith('.ogg'):
            file_path = os.path.join(directory, filename)
            print(f"Processing {filename}... ({list(os.listdir(directory)).index(filename) + 1}/{len(os.listdir(directory))})")
            features = load_audio_features(file_path, n_mfcc)
            audio_features.append({
                "file_path": file_path,
                "title": filename,
                "embedding": features.tolist()
            })
    return audio_features

In [22]:
# Set your musicML directory path
musicml_directory = './demoSounds'  # Replace with your actual directory path

# Get features for all demo files
demo_features = get_all_audio_features(musicml_directory)

Processing Jiggy Groove 1 100 Bpm.alc.ogg... (1/67)
Processing Dirty Funk Lead.adg.ogg... (2/67)
Processing 212 Kit Melodic 02 118 bpm.alc.ogg... (3/67)
Processing Move Up Kit Melodic 01 100 bpm.alc.ogg... (4/67)
Processing Evening Casual Bb Minor 06 89 bpm.alc.ogg... (5/67)
Processing Evening Casual E Minor 87bpm.alc.ogg... (6/67)
Processing Safe House Kit Melodic 02 88 bpm.alc.ogg... (7/67)
Processing Dan Piano D Minor 86 bpm.alc.ogg... (8/67)
Processing Honey Pulp Lead.adg.ogg... (9/67)
Processing Obee 12 Voice Brass.adg.ogg... (10/67)
Processing Slow Pull Back C Minor 85bpm.alc.ogg... (11/67)
Processing Wah Soul Kit groove 01_86 bpm.alc.ogg... (12/67)
Processing Bari Dust EP F Minor 83 bpm.alc.ogg... (13/67)
Processing Smokey Keys D Minor 88 bpm.alc.ogg... (14/67)
Processing A Trying Sine D Minor 86bpm.alc.ogg... (15/67)
Processing Bang Ya Head Kit 02 92 bpm.alc.ogg... (16/67)
Processing Light Horn D Minor 88bpm.alc.ogg... (17/67)
Processing Blueprint Kit groove 02_81 bpm.alc.ogg..

In [28]:
# upload the files and features to the database
# SCHEMA:
# {
#     "filename": "string",
#     "file_path": "string",
#     "features": "array"
# }
from pymongo import MongoClient
import gridfs

# Connect to MongoDB
uri = "mongodb+srv://ethanjags1:OrIjEQHBSzR0k1GJ@demo-sounds.jax2c.mongodb.net/?retryWrites=true&w=majority&appName=demo-sounds"
client = MongoClient(uri)
db = client["soundDB"]
fs = gridfs.GridFS(db)

# Upload files and save metadata
for sound in demo_features:
    with open(sound["file_path"], "rb") as file:
        # Save the file to GridFS
        file_id = fs.put(file, filename=sound["file_path"].split("/")[-1])

        # Store metadata in the demo_sounds collection
        db.demo_sounds.insert_one({
            "file_id": file_id,
            "title": sound["title"],
            "embedding": sound["embedding"]
        })

print("Demo sounds uploaded successfully!")

Demo sounds uploaded successfully!
