In [1]:
!pip install librosa numpy faiss-cpu torch transformers crepe scipy fastdtw tqdm nnAudio jsonlines

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m

In [3]:
import os
import numpy as np
import librosa
import faiss
import logging
import torch
from transformers import ClapProcessor, ClapModel, WhisperProcessor, WhisperForConditionalGeneration, BertTokenizer, BertModel
import crepe
from scipy.signal import butter, filtfilt
from scipy.spatial.distance import cosine
from fastdtw import fastdtw
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import random
from scipy.signal import correlate

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# CLAP model setup (fallback)
# Note: To use MERT instead, replace with from transformers import AutoProcessor, AutoModel; processor = AutoProcessor.from_pretrained("m-a-p/MERT-v1-95M"); model = AutoModel.from_pretrained("m-a-p/MERT-v1-95M")
# Then adjust extract_clap_embedding to use model.encode_audio or similar.
embedding_dim = 512
index = faiss.IndexFlatL2(embedding_dim)
processor = ClapProcessor.from_pretrained("laion/clap-htsat-unfused")
model = ClapModel.from_pretrained("laion/clap-htsat-unfused")

# Whisper for lyrics (upgraded to medium for better accuracy)
whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")

# BERT for lyrics embeddings
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")

# Database storage
database_embeddings = []
database_spectral = []
database_chroma_seq = []
database_onsets = []
database_tempos = []
database_melody_sequences = []
database_lyrics_emb = []
database_chord_seq = []
database_rms = []  # New for RMS energy sequences
database_tracks = []

# Paths and constants
AUDIO_FOLDER_PATH = "/home/ubuntu/mahesh_YUE/input_songs1"
EMBEDDINGS_DIR = "/home/ubuntu/mahesh_YUE/version_3_embed"
ALLOWED_FORMATS = ('.mp3', '.wav', '.aiff')

# Low-pass filter
def lowpass_filter(signal, sr, cutoff=2500):
    nyquist = 0.5 * sr
    norm_cutoff = cutoff / nyquist
    b, a = butter(4, norm_cutoff, btype='low', analog=False)
    return filtfilt(b, a, signal)

# Feature extraction
def extract_clap_embedding(audio, sr=48000):
    try:
        inputs = processor(audios=[audio], sampling_rate=sr, return_tensors="pt", padding=True)
        with torch.no_grad():
            embedding = model.get_audio_features(**inputs)
        return np.array(embedding.squeeze().numpy(), dtype=np.float32)
    except Exception as e:
        logger.error(f"Error extracting CLAP embedding: {e}")
        return None

def extract_lyrics_embedding(audio, sr):
    try:
        audio_16k = librosa.resample(audio, orig_sr=sr, target_sr=16000)
        input_features = whisper_processor(audio_16k, sampling_rate=16000, return_tensors="pt").input_features
        predicted_ids = whisper_model.generate(input_features)
        lyrics = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
        inputs = bert_tokenizer(lyrics, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            outputs = bert_model(**inputs)
        emb = outputs.pooler_output.squeeze().numpy().astype(np.float32)
        return emb
    except Exception as e:
        logger.error(f"Error extracting lyrics: {e}")
        return None

def extract_melody_sequence(audio, sr=16000):
    try:
        mono = librosa.to_mono(audio) if audio.ndim > 1 else audio
        filtered = lowpass_filter(mono, sr)
        time, frequency, confidence, _ = crepe.predict(filtered, sr, model_capacity='tiny', step_size=20)
        high_conf_idx = confidence > 0.7
        if not np.any(high_conf_idx):
            return None
        freqs = frequency[high_conf_idx].astype(np.float32)
        log_freqs = np.log2(freqs / 440) * 1200  # Cents for shift invariance
        return log_freqs
    except Exception as e:
        logger.error(f"Error extracting melody: {e}")
        return None

def extract_spectral_features(audio, sr):
    try:
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
        return np.mean(mfccs, axis=1).astype(np.float32)
    except Exception as e:
        logger.error(f"Error extracting spectral features: {e}")
        return None

def extract_chroma_features(audio, sr, beats):
    try:
        chroma = librosa.feature.chroma_cqt(y=audio, sr=sr, bins_per_octave=48)  # Higher resolution
        if beats is None:
            return None
        synced_chroma = librosa.util.sync(chroma, beats, aggregate=np.mean)
        return synced_chroma.T.astype(np.float32)
    except Exception as e:
        logger.error(f"Error extracting chroma features: {e}")
        return None

def extract_chord_sequence(audio, sr, beats):
    try:
        chroma = librosa.feature.chroma_cqt(y=audio, sr=sr)
        chords = librosa.effects.harmonic(chroma)  # Simplified harmonic extraction
        synced_chords = librosa.util.sync(chords, beats, aggregate=np.mean)
        return synced_chords.T.astype(np.float32)
    except Exception as e:
        logger.error(f"Error extracting chord sequence: {e}")
        return None

def extract_rms_sequence(audio, sr, beats):
    try:
        rms = librosa.feature.rms(y=audio)
        if beats is None:
            return None
        synced_rms = librosa.util.sync(rms, beats, aggregate=np.mean)
        return synced_rms.flatten().astype(np.float32)
    except Exception as e:
        logger.error(f"Error extracting RMS sequence: {e}")
        return None

def extract_onset_features(audio, sr):
    try:
        onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
        tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
        onset_frames = librosa.frames_to_samples(beats, hop_length=512)
        tempo_scalar = float(tempo) if np.isscalar(tempo) else float(tempo[0])
        return onset_env[:len(onset_frames)], tempo_scalar, beats
    except Exception as e:
        logger.error(f"Error extracting onset features: {e}")
        return None, None, None

def process_audio_file(audio_path):
    try:
        y, sr = librosa.load(audio_path, sr=None)
        y = librosa.util.normalize(y)  # Normalize audio volume
        onsets, tempo, beats = extract_onset_features(y, sr)
        rms = extract_rms_sequence(y, sr, beats)
        return audio_path, (
            extract_clap_embedding(y),
            extract_spectral_features(y, sr),
            extract_chroma_features(y, sr, beats),
            onsets,
            tempo,
            extract_melody_sequence(y),
            extract_lyrics_embedding(y, sr),
            extract_chord_sequence(y, sr, beats),
            rms  # New
        )
    except Exception as e:
        logger.error(f"Error processing {audio_path}: {e}")
        return audio_path, (None, None, None, None, None, None, None, None, None)

# Distance and similarity functions
def scalar_distance(x, y):
    return abs(x - y)

def dtw_similarity(seq1, seq2, sigma, dist_func=scalar_distance):
    if seq1 is None or seq2 is None or len(seq1) < 2 or len(seq2) < 2:
        return 0.0
    try:
        downsample_rate = max(1, len(seq1) // 5000)
        seq1_ds = seq1[::downsample_rate]
        seq2_ds = seq2[::downsample_rate]
        if dist_func == cosine:  # For chroma/chords
            mean1 = np.mean(seq1_ds, axis=0)
            mean2 = np.mean(seq2_ds, axis=0)
            corr = [np.dot(np.roll(mean1, k), mean2) for k in range(12)]
            best_shift = np.argmax(corr)
            seq2_ds = np.roll(seq2_ds, best_shift, axis=1)
        if dist_func == scalar_distance:  # For melody
            seq1_diff = np.diff(seq1_ds)
            seq2_diff = np.diff(seq2_ds)
            dtw_distance, path = fastdtw(seq1_diff, seq2_diff, dist=dist_func)
            dtw_distance /= len(path) if path else 1  # Local alignment norm
        else:
            dtw_distance, path = fastdtw(seq1_ds, seq2_ds, dist=dist_func)
            dtw_distance /= len(path) if path else 1
        return np.exp(-dtw_distance / sigma)
    except Exception as e:
        logger.error(f"Error in DTW computation: {e}")
        return 0.0

# Save features
def save_features():
    try:
        os.makedirs(EMBEDDINGS_DIR, exist_ok=True)
        if not os.access(EMBEDDINGS_DIR, os.W_OK):
            raise PermissionError(f"No write permission for {EMBEDDINGS_DIR}")
        if not database_tracks:
            logger.warning("No valid tracks to save")
            return False
        np.save(os.path.join(EMBEDDINGS_DIR, "embeddings.npy"), np.array(database_embeddings, dtype=np.float32))
        np.save(os.path.join(EMBEDDINGS_DIR, "spectral.npy"), np.array(database_spectral, dtype=np.float32))
        np.save(os.path.join(EMBEDDINGS_DIR, "chroma_seq.npy"), np.array(database_chroma_seq, dtype=object))
        np.save(os.path.join(EMBEDDINGS_DIR, "onsets.npy"), np.array(database_onsets, dtype=object))
        np.save(os.path.join(EMBEDDINGS_DIR, "tempos.npy"), np.array(database_tempos, dtype=np.float32))
        np.save(os.path.join(EMBEDDINGS_DIR, "melody_sequences.npy"), np.array(database_melody_sequences, dtype=object))
        np.save(os.path.join(EMBEDDINGS_DIR, "lyrics_emb.npy"), np.array(database_lyrics_emb, dtype=np.float32))
        np.save(os.path.join(EMBEDDINGS_DIR, "chord_seq.npy"), np.array(database_chord_seq, dtype=object))
        np.save(os.path.join(EMBEDDINGS_DIR, "rms.npy"), np.array(database_rms, dtype=object))  # New
        np.save(os.path.join(EMBEDDINGS_DIR, "tracks.npy"), np.array(database_tracks, dtype=str))
        np.save(os.path.join(EMBEDDINGS_DIR, "max_distance_onsets.npy"), np.array([MAX_DISTANCE_ONSETS], dtype=np.float32))
        np.save(os.path.join(EMBEDDINGS_DIR, "max_distance_chroma.npy"), np.array([MAX_DISTANCE_CHROMA], dtype=np.float32))
        np.save(os.path.join(EMBEDDINGS_DIR, "max_distance_rms.npy"), np.array([MAX_DISTANCE_RMS], dtype=np.float32))  # New
        logger.info(f"Successfully saved features to {EMBEDDINGS_DIR} with {len(database_tracks)} tracks")
        return True
    except Exception as e:
        logger.error(f"Failed to save features: {e}")
        return False

# Load features
def load_features():
    global database_embeddings, database_spectral, database_chroma_seq, database_onsets, database_tempos, database_melody_sequences, database_lyrics_emb, database_chord_seq, database_rms, database_tracks, MAX_DISTANCE_ONSETS, MAX_DISTANCE_CHROMA, MAX_DISTANCE_RMS
    required_files = ["embeddings.npy", "spectral.npy", "chroma_seq.npy", "onsets.npy", "tempos.npy", "melody_sequences.npy", "lyrics_emb.npy", "chord_seq.npy", "rms.npy", "tracks.npy", "max_distance_onsets.npy", "max_distance_chroma.npy", "max_distance_rms.npy"]
    if os.path.exists(EMBEDDINGS_DIR) and all(os.path.exists(os.path.join(EMBEDDINGS_DIR, f)) for f in required_files):
        try:
            database_embeddings = np.load(os.path.join(EMBEDDINGS_DIR, "embeddings.npy")).tolist()
            database_spectral = np.load(os.path.join(EMBEDDINGS_DIR, "spectral.npy")).tolist()
            database_chroma_seq = np.load(os.path.join(EMBEDDINGS_DIR, "chroma_seq.npy"), allow_pickle=True).tolist()
            database_onsets = np.load(os.path.join(EMBEDDINGS_DIR, "onsets.npy"), allow_pickle=True).tolist()
            database_tempos = np.load(os.path.join(EMBEDDINGS_DIR, "tempos.npy")).tolist()
            database_melody_sequences = np.load(os.path.join(EMBEDDINGS_DIR, "melody_sequences.npy"), allow_pickle=True).tolist()
            database_lyrics_emb = np.load(os.path.join(EMBEDDINGS_DIR, "lyrics_emb.npy")).tolist()
            database_chord_seq = np.load(os.path.join(EMBEDDINGS_DIR, "chord_seq.npy"), allow_pickle=True).tolist()
            database_rms = np.load(os.path.join(EMBEDDINGS_DIR, "rms.npy"), allow_pickle=True).tolist()  # New
            database_tracks = np.load(os.path.join(EMBEDDINGS_DIR, "tracks.npy")).tolist()
            MAX_DISTANCE_ONSETS = np.load(os.path.join(EMBEDDINGS_DIR, "max_distance_onsets.npy"))[0]
            MAX_DISTANCE_CHROMA = np.load(os.path.join(EMBEDDINGS_DIR, "max_distance_chroma.npy"))[0]
            MAX_DISTANCE_RMS = np.load(os.path.join(EMBEDDINGS_DIR, "max_distance_rms.npy"))[0]  # New
            index.reset()
            index.add(np.array(database_embeddings, dtype=np.float32))
            logger.info(f"Successfully loaded features from {EMBEDDINGS_DIR} with {len(database_tracks)} tracks")
            return True
        except Exception as e:
            logger.error(f"Error loading features: {e}")
            return False
    else:
        missing_files = [f for f in required_files if not os.path.exists(os.path.join(EMBEDDINGS_DIR, f))]
        logger.warning(f"Feature files missing in {EMBEDDINGS_DIR}: {missing_files}")
        return False

# Database initialization
def initialize_database():
    global MAX_DISTANCE_ONSETS, MAX_DISTANCE_CHROMA, MAX_DISTANCE_RMS
    if load_features():
        return
    audio_files = [os.path.join(AUDIO_FOLDER_PATH, f) for f in os.listdir(AUDIO_FOLDER_PATH) if f.endswith(ALLOWED_FORMATS)]
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_audio_file, audio_files), total=len(audio_files), desc="Processing audio files"))
    for audio_path, result in results:
        if all(v is not None for v in result):
            database_embeddings.append(result[0])
            database_spectral.append(result[1])
            database_chroma_seq.append(result[2])
            database_onsets.append(result[3])
            database_tempos.append(result[4])
            database_melody_sequences.append(result[5])
            database_lyrics_emb.append(result[6])
            database_chord_seq.append(result[7])
            database_rms.append(result[8])  # New
            database_tracks.append(os.path.basename(audio_path))
    n = len(database_onsets)
    all_pairs = [(i, j) for i in range(n) for j in range(i + 1, n)]
    sample_size = min(100, len(all_pairs))
    sampled_pairs = random.sample(all_pairs, sample_size) if sample_size < len(all_pairs) else all_pairs
    onset_dtw_distances = []
    for i, j in sampled_pairs:
        if database_onsets[i] is not None and database_onsets[j] is not None:
            dist, _ = fastdtw(database_onsets[i][::5], database_onsets[j][::5], dist=scalar_distance)
            onset_dtw_distances.append(dist)
    MAX_DISTANCE_ONSETS = max(onset_dtw_distances) if onset_dtw_distances else 1000
    chroma_dtw_distances = []
    chroma_dist = lambda x, y: cosine(x, y)
    for i, j in sampled_pairs:
        if database_chroma_seq[i] is not None and database_chroma_seq[j] is not None:
            dist, _ = fastdtw(database_chroma_seq[i][::5], database_chroma_seq[j][::5], dist=chroma_dist)
            chroma_dtw_distances.append(dist)
    MAX_DISTANCE_CHROMA = max(chroma_dtw_distances) if chroma_dtw_distances else 1000
    rms_dtw_distances = []
    for i, j in sampled_pairs:
        if database_rms[i] is not None and database_rms[j] is not None:
            dist, _ = fastdtw(database_rms[i][::5], database_rms[j][::5], dist=scalar_distance)
            rms_dtw_distances.append(dist)
    MAX_DISTANCE_RMS = max(rms_dtw_distances) if rms_dtw_distances else 1.0  # RMS is typically small
    if database_embeddings:
        index.add(np.array(database_embeddings, dtype=np.float32))
        save_features()
    else:
        logger.warning("Database is empty!")

# Query comparison
def compare_query_to_database(query_audio_path):
    y, sr = librosa.load(query_audio_path, sr=None)
    y = librosa.util.normalize(y)  # Normalize audio volume
    onsets, tempo, beats = extract_onset_features(y, sr)
    query_clap = extract_clap_embedding(y)
    query_spectral = extract_spectral_features(y, sr)
    query_chroma_seq = extract_chroma_features(y, sr, beats)
    query_onsets = onsets
    query_tempo = tempo
    query_melody_seq = extract_melody_sequence(y)
    query_lyrics_emb = extract_lyrics_embedding(y, sr)
    query_chord_seq = extract_chord_sequence(y, sr, beats)
    query_rms = extract_rms_sequence(y, sr, beats)
    if query_clap is not None:
        query_clap_reshaped = query_clap.reshape(1, -1)
        k = min(100, len(database_embeddings))
        D, I = index.search(query_clap_reshaped, k)
        candidates = [int(idx) for idx in I[0] if idx != -1]
    else:
        candidates = list(range(len(database_embeddings)))
    H_list, M_list, B_list, C_list, S_list, T_list, L_list, Chord_list, E_list = [], [], [], [], [], [], [], [], []
    chroma_dist = lambda x, y: cosine(x, y)
    for i in candidates:
        H = np.dot(query_clap, database_embeddings[i]) / (np.linalg.norm(query_clap) * np.linalg.norm(database_embeddings[i])) if query_clap is not None else 0.0
        H = max(0, H)
        S = 1 - cosine(query_spectral, database_spectral[i]) if query_spectral is not None else 0.0
        S = max(0, S)
        C = dtw_similarity(query_chroma_seq, database_chroma_seq[i], sigma=MAX_DISTANCE_CHROMA / 10, dist_func=chroma_dist)
        B = dtw_similarity(query_onsets, database_onsets[i], sigma=MAX_DISTANCE_ONSETS / 10)
        M = dtw_similarity(query_melody_seq, database_melody_sequences[i], sigma=50)
        T = np.exp(-abs(query_tempo - database_tempos[i]) / 10) if query_tempo is not None else 0.0
        L = 1 - cosine(query_lyrics_emb, database_lyrics_emb[i]) if query_lyrics_emb is not None else 0.0
        L = max(0, L)
        Chord = dtw_similarity(query_chord_seq, database_chord_seq[i], sigma=MAX_DISTANCE_CHROMA / 10, dist_func=chroma_dist)
        E = dtw_similarity(query_rms, database_rms[i], sigma=MAX_DISTANCE_RMS / 10, dist_func=scalar_distance)
        H_list.append(H)
        S_list.append(S)
        C_list.append(C)
        B_list.append(B)
        M_list.append(M)
        T_list.append(T)
        L_list.append(L)
        Chord_list.append(Chord)
        E_list.append(E)
    def normalize(lst):
        lst = np.array(lst)
        min_val, max_val = lst.min(), lst.max()
        return (lst - min_val) / (max_val - min_val + 1e-8) if max_val > min_val else lst / (lst + 1e-8)
    H_list = normalize(H_list)
    M_list = normalize(M_list)
    B_list = normalize(B_list)
    C_list = normalize(C_list)
    S_list = normalize(S_list)
    T_list = normalize(T_list)
    L_list = normalize(L_list)
    Chord_list = normalize(Chord_list)
    E_list = normalize(E_list)
    variances = {
        'H': np.var(H_list),
        'M': np.var(M_list),
        'B': np.var(B_list),
        'C': np.var(C_list),
        'S': np.var(S_list),
        'T': np.var(T_list),
        'L': np.var(L_list),
        'Chord': np.var(Chord_list),
        'E': np.var(E_list)
    }
    adjusted_weights = {f: variances[f] for f in variances}
    total_weight = sum(adjusted_weights.values()) + 1e-8
    adjusted_weights = {f: w / total_weight for f, w in adjusted_weights.items()}
    adjusted_weights['M'] *= 2.0
    adjusted_weights['C'] *= 2.0
    adjusted_weights['B'] *= 1.5
    adjusted_weights['L'] *= 1.5
    adjusted_weights['Chord'] *= 1.5
    adjusted_weights['E'] *= 1.2  # New for energy
    adjusted_weights['S'] *= 0.5
    adjusted_weights['H'] *= 0.5
    total_weight = sum(adjusted_weights.values()) + 1e-8
    adjusted_weights = {f: w / total_weight for f, w in adjusted_weights.items()}
    logger.info(f"Dynamic weights: {adjusted_weights}")
    logger.info(f"Variances: {variances}")
    similarities = []
    for idx, i in enumerate(candidates):
        score = (
            adjusted_weights['H'] * H_list[idx] +
            adjusted_weights['M'] * M_list[idx] +
            adjusted_weights['B'] * B_list[idx] +
            adjusted_weights['C'] * C_list[idx] +
            adjusted_weights['S'] * S_list[idx] +
            adjusted_weights['T'] * T_list[idx] +
            adjusted_weights['L'] * L_list[idx] +
            adjusted_weights['Chord'] * Chord_list[idx] +
            adjusted_weights['E'] * E_list[idx]
        )
        similarities.append({'track': database_tracks[i], 'similarity_score': score})
    similarities.sort(key=lambda x: x['similarity_score'], reverse=True)
    top_20 = similarities[:20]
    logger.info(f"Top 20 tracks: {[t['track'] for t in top_20]}")
    return top_20

# Main execution
def main():
    initialize_database()
    if not database_embeddings:
        logger.warning("No valid audio data available.")
        return
    query_audio = "/home/ubuntu/mahesh_YUE/input_songs1/KISS - Crazy Crazy Nights.mp3"
    top_similar = compare_query_to_database(query_audio)
    print("\nTop similar tracks:")
    query_filename = os.path.basename(query_audio)
    for t in top_similar:
        print(f"Track: {t['track']}, Score: {t['similarity_score']:.3f}")
        if t['track'] == query_filename:
            logger.info(f"Query audio {query_filename} found with score {t['similarity_score']:.3f}")

if __name__ == "__main__":
    main()

I0000 00:00:1755776534.238803 3412442 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46866 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:07:00.0, compute capability: 8.6
I0000 00:00:1755776537.274022 3412533 service.cc:148] XLA service 0x7bbd4400d950 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755776537.274122 3412533 service.cc:156]   StreamExecutor device (0): NVIDIA RTX A6000, Compute Capability 8.6
2025-08-21 11:42:17.312370: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755776537.388550 3412533 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m 39/606[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 4ms/step

I0000 00:00:1755776538.226373 3412527 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m556/556[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step
[1m606/606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step
[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step
[1m1156/1156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step
[1m1246/1246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step


Processing audio files: 100%|██████████| 5/5 [01:13<00:00, 14.65s/it]
2025-08-21 11:43:22,084 - INFO - Successfully saved features to /home/ubuntu/mahesh_YUE/version_3_embed with 5 tracks


[1m1067/1067[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step


2025-08-21 11:43:45,135 - INFO - Dynamic weights: {'H': 0.04419216267009347, 'M': 0.19399517334478447, 'B': 0.13246047427704927, 'C': 0.14123041224982508, 'S': 0.04626186156810752, 'T': 0.10460655194051856, 'L': 0.1271769222357077, 'Chord': 0.10768433818796548, 'E': 0.10239209569259557}
2025-08-21 11:43:45,137 - INFO - Variances: {'H': 0.1364888, 'M': 0.1497899508659471, 'B': 0.13636935800373912, 'C': 0.1090485713996631, 'S': 0.14288112125219268, 'T': 0.16154020747287498, 'L': 0.1309298893336354, 'Chord': 0.11086208279034335, 'E': 0.1317670840156806}
2025-08-21 11:43:45,137 - INFO - Top 20 tracks: ['KISS - Crazy Crazy Nights.mp3', 'Aerosmith - Dude (Looks Like A Lady).mp3', 'The Beatles - All My Loving - Remastered 2009.mp3', 'Lita Ford - Playin_ with Fire.mp3', 'light-instrumental-melody-4-319030.wav']
2025-08-21 11:43:45,139 - INFO - Query audio KISS - Crazy Crazy Nights.mp3 found with score 0.991



Top similar tracks:
Track: KISS - Crazy Crazy Nights.mp3, Score: 0.991
Track: Aerosmith - Dude (Looks Like A Lady).mp3, Score: 0.452
Track: The Beatles - All My Loving - Remastered 2009.mp3, Score: 0.342
Track: Lita Ford - Playin_ with Fire.mp3, Score: 0.339
Track: light-instrumental-melody-4-319030.wav, Score: 0.193
