## This script partitions the input directory into a demoSounds directory and uploads the demo files to the database

In [4]:
import os
import shutil
import random
import math
import os
import librosa
import numpy as np
from scipy.spatial.distance import cdist
from IPython.display import Audio, display

In [1]:

# Create demoSounds directory if it doesn't exist
demo_dir = 'demoSounds'
if not os.path.exists(demo_dir):
    os.makedirs(demo_dir)

# Get list of files from inputs directory
input_dir = 'inputs'
sound_files = []
for root, dirs, files in os.walk(input_dir):
    for file in files:
        if file.endswith(('.ogg')):  # Add other audio extensions if needed
            sound_files.append(os.path.join(root, file))

# Calculate number of files to move (5%)
num_files_to_move = math.ceil(len(sound_files) * 0.05)

# Randomly select files to move
files_to_move = random.sample(sound_files, num_files_to_move)

# Move selected files to demoSounds
for file_path in files_to_move:
    file_name = os.path.basename(file_path)
    destination = os.path.join(demo_dir, file_name)
    shutil.move(file_path, destination)
    print(f"Moved {file_name} to {demo_dir}")

print(f"\nMoved {num_files_to_move} files ({5}% of total) to {demo_dir}")


Moved A Model D D Minor 88 bpm.alc.ogg to demoSounds
Moved Alley Kat Kit Melodic 03 140 bpm.alc.ogg to demoSounds
Moved Jiggy With It Kit.adg.ogg to demoSounds
Moved Grind Bass.adg.ogg to demoSounds
Moved Evening Casual Bb Minor 06 89 bpm.alc.ogg to demoSounds
Moved LoFi FM EP Chords A Minor 92bpm.alc.ogg to demoSounds
Moved Dobbs Kit groove 07_96 bpm.alc.ogg to demoSounds
Moved Move Up Kit Melodic 01 100 bpm.alc.ogg to demoSounds
Moved Wah Soul Kit groove 01_86 bpm.alc.ogg to demoSounds
Moved Canvas Kit groove 03_94 bpm.alc.ogg to demoSounds
Moved A Trying Sine D Minor 86bpm.alc.ogg to demoSounds
Moved Allee Gate Groove 3 105BPM.alc.ogg to demoSounds
Moved Old School Stab 83bpm.alc.ogg to demoSounds
Moved New Old School A Minor 2 83bpm.alc.ogg to demoSounds
Moved Bang Ya Head Kit 02 92 bpm.alc.ogg to demoSounds
Moved Creamy Voices.adg.ogg to demoSounds
Moved Wonder Bass.adg.ogg to demoSounds
Moved Vintage Madman Groove 1 78bpm.alc.ogg to demoSounds
Moved 212 Kit Groove drums only 90 B

Upload demo files to mongoDB

In [15]:
def load_audio_features(file_path, n_mfcc=13):
    """
    Load an audio file and extract MFCC features.
    """
    y, sr = librosa.load(file_path, sr=None)
    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    # Compute the mean of the MFCCs over time
    mfcc_mean = np.mean(mfcc.T, axis=0)
    return mfcc_mean

def get_all_audio_features(directory, n_mfcc=13):
    """
    Load all .ogg files in the directory and extract their MFCC features.
    """
    audio_features = []
    for filename in os.listdir(directory):
        if filename.endswith('.ogg'):
            file_path = os.path.join(directory, filename)
            print(f"Processing {filename}... ({list(os.listdir(directory)).index(filename) + 1}/{len(os.listdir(directory))})")
            features = load_audio_features(file_path, n_mfcc)
            audio_features.append({
                "file_path": file_path,
                "title": filename,
                "embedding": features.tolist()
            })
    return audio_features

In [16]:
# Set your musicML directory path
musicml_directory = './demoSounds'  # Replace with your actual directory path

# Get features for all demo files
demo_features = get_all_audio_features(musicml_directory)

Processing Jiggy Groove 1 100 Bpm.alc.ogg... (1/34)
Processing 212 Kit Melodic 02 118 bpm.alc.ogg... (2/34)
Processing Move Up Kit Melodic 01 100 bpm.alc.ogg... (3/34)
Processing Evening Casual Bb Minor 06 89 bpm.alc.ogg... (4/34)
Processing Honey Pulp Lead.adg.ogg... (5/34)
Processing Wah Soul Kit groove 01_86 bpm.alc.ogg... (6/34)
Processing A Trying Sine D Minor 86bpm.alc.ogg... (7/34)
Processing Bang Ya Head Kit 02 92 bpm.alc.ogg... (8/34)
Processing Light Horn D Minor 88bpm.alc.ogg... (9/34)
Processing Grind Bass.adg.ogg... (10/34)
Processing Canvas Kit groove 03_94 bpm.alc.ogg... (11/34)
Processing Mallet Synth G# Minor 98 bpm.alc.ogg... (12/34)
Processing Freak-o-saur Bb Minor 87 bpm.alc.ogg... (13/34)
Processing Twisted Organ.adg.ogg... (14/34)
Processing Dirty Funk Lead E Minor 90bpm.alc.ogg... (15/34)
Processing Allee Gate Groove 3 105BPM.alc.ogg... (16/34)
Processing Creamy Voices.adg.ogg... (17/34)
Processing Muddy Whirl Piano.adg.ogg... (18/34)
Processing 212 Kit Groove dr

In [17]:
# upload the files and features to the database
# SCHEMA:
# {
#     "filename": "string",
#     "file_path": "string",
#     "features": "array"
# }
from pymongo import MongoClient
import gridfs

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["soundDB"]
fs = gridfs.GridFS(db)

# Upload files and save metadata
for sound in demo_features:
    with open(sound["file_path"], "rb") as file:
        # Save the file to GridFS
        file_id = fs.put(file, filename=sound["file_path"].split("/")[-1])

        # Store metadata in the demo_sounds collection
        db.demo_sounds.insert_one({
            "file_id": file_id,
            "title": sound["title"],
            "embedding": sound["embedding"]
        })

print("Demo sounds uploaded successfully!")

Demo sounds uploaded successfully!
