In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
import scann
import librosa
import os
from sklearn.metrics import silhouette_score
from sklearn.base import BaseEstimator
from sklearn.model_selection import GridSearchCV

# Load precomputed features from the CSV file
data_path = "/home/mutayyab/Documents/dataset/preprocessed_audios.csv"  # Update this path if needed
features_df = pd.read_csv(data_path)

# Assume features are all columns after 'filename'
feature_columns = features_df.columns[1:448]
features = features_df[feature_columns].values
normalized_features = tf.math.l2_normalize(features, axis=1)

# Define ScannSearcherWrapper class for GridSearchCV
class ScannSearcherWrapper(BaseEstimator):
    def __init__(self, features, num_neighbors=10, num_leaves=10, num_leaves_to_search=5):
        self.features = features
        self.num_neighbors = num_neighbors
        self.num_leaves = num_leaves
        self.num_leaves_to_search = num_leaves_to_search
        
    def fit(self, X, y=None):
        self.searcher = self.create_scann_index(self.features, self.num_neighbors, self.num_leaves, self.num_leaves_to_search)
        return self
    
    def create_scann_index(self, features, num_neighbors, num_leaves, num_leaves_to_search):
        searcher = scann.scann_ops_pybind.builder(
            features, num_neighbors, "dot_product"
        ).tree(
            num_leaves=num_leaves, num_leaves_to_search=num_leaves_to_search
        ).score_ah(
            2, anisotropic_quantization_threshold=0.2
        ).reorder(100).build()
        return searcher
    
    def search(self, X, n_neighbours=5):
        return self.get_nearest_neighbours(X, self.searcher, n_neighbours)
    
    def get_nearest_neighbours(self, feature_vector, searcher, n_neighbours=5):
        # Ensure the vector is one-dimensional and a numpy array
        if feature_vector.ndim != 1 or not isinstance(feature_vector, np.ndarray):
            feature_vector = np.array(feature_vector).flatten()

        neighbors, distances = searcher.search(feature_vector, final_num_neighbors=n_neighbours)
        return neighbors, distances

# Define a grid of values for num_leaves
param_grid = {'num_leaves': [10, 20, 30, 40, 50]}

# Create a grid search object
grid_search = GridSearchCV(estimator=ScannSearcherWrapper(features=normalized_features), param_grid=param_grid, scoring='accuracy', cv=5)


# Perform grid search
grid_search.fit(normalized_features.numpy())


# Get the best hyperparameters
best_num_leaves = grid_search.best_params_['num_leaves']

# Update the create_scann_index function with the best num_leaves
def create_scann_index(features, num_neighbors=10, num_leaves=best_num_leaves, num_leaves_to_search=5):
    searcher = scann.scann_ops_pybind.builder(
        features, num_neighbors, "dot_product"
    ).tree(
        num_leaves=num_leaves, num_leaves_to_search=num_leaves_to_search
    ).score_ah(
        2, anisotropic_quantization_threshold=0.2
    ).reorder(100).build()
    return searcher

# Create the ScaNN searcher with the best num_leaves
scann_searcher = create_scann_index(normalized_features)

def load_and_query(audio_path, features_df, searcher, n_neighbours=5):
    if not os.path.exists(audio_path):
        print("File not found. Please check the path and try again.")
        return

    y, sr = librosa.load(audio_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = mfcc.mean(axis=1)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=50)
    spectrogram = librosa.amplitude_to_db(S, ref=np.max)
    spectrogram_mean = spectrogram.mean(axis=1)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
    tempogram_mean = tempogram.mean(axis=1)

    feature_vector = np.hstack([mfcc_mean, spectrogram_mean, tempogram_mean])
    if np.linalg.norm(feature_vector) > 0:
        feature_vector_normalized = feature_vector / np.linalg.norm(feature_vector)
    else:
        feature_vector_normalized = feature_vector

    nearest_neighbours, distances = get_nearest_neighbours(feature_vector_normalized, searcher, n_neighbours)
    print(" ")
    print("Top Recommendations Based on Rhythmic Similarity:")
    print(" ")
    for neighbour, distance in zip(nearest_neighbours, distances):
        print(f"Recommended Track: {features_df.iloc[neighbour]['filename']} - Similarity Score: {1 - distance}")

def get_nearest_neighbours(feature_vector, searcher, n_neighbours=5):
    # Ensure the vector is one-dimensional and a numpy array
    if feature_vector.ndim != 1 or not isinstance(feature_vector, np.ndarray):
        feature_vector = np.array(feature_vector).flatten()

    neighbors, distances = searcher.search(feature_vector, final_num_neighbors=n_neighbours)
    return neighbors, distances

def classify_rhythmic_features(df):
    print(" ")
    print("Calculating silhouette score based on clustering...")
    
    # Extract the feature columns for silhouette score calculation
    feature_columns = [col for col in df.columns if col.startswith('tempo')]
    
    # Calculate silhouette score
    silhouette_avg = silhouette_score(df[feature_columns], df['cluster'])
    print(f"Silhouette Score: {silhouette_avg}")
    
    return df

# Perform clustering and calculate silhouette score
features_df = classify_rhythmic_features(features_df)
print(" ")
audio_path = input("path_to_your_audio_file.mp3") # Update this path
load_and_query(audio_path, features_df, scann_searcher, 10)


2024-05-12 14:19:35.883136: I scann/partitioning/partitioner_factory_base.cc:59] Size of sampled dataset for training partition: 1662
2024-05-12 14:19:35.899632: I ./scann/partitioning/kmeans_tree_partitioner_utils.h:84] PartitionerFactory ran in 16.451518ms.
2024-05-12 14:19:36.338702: I scann/partitioning/partitioner_factory_base.cc:59] Size of sampled dataset for training partition: 1662
2024-05-12 14:19:36.356969: I ./scann/partitioning/kmeans_tree_partitioner_utils.h:84] PartitionerFactory ran in 18.223871ms.
2024-05-12 14:19:36.775973: I scann/partitioning/partitioner_factory_base.cc:59] Size of sampled dataset for training partition: 1662
2024-05-12 14:19:36.803925: I ./scann/partitioning/kmeans_tree_partitioner_utils.h:84] PartitionerFactory ran in 27.887433ms.
2024-05-12 14:19:37.231110: I scann/partitioning/partitioner_factory_base.cc:59] Size of sampled dataset for training partition: 1662
2024-05-12 14:19:37.258916: I ./scann/partitioning/kmeans_tree_partitioner_utils.h:84]

 
Calculating silhouette score based on clustering...
Silhouette Score: 0.2252306704302388
 


path_to_your_audio_file.mp3 /home/mutayyab/Documents/dataset/091/091125.mp3


 
Top Recommendations Based on Rhythmic Similarity:
 
Recommended Track: 091643.mp3 - Similarity Score: 0.369567334651947
Recommended Track: 091992.mp3 - Similarity Score: 0.3840720057487488
Recommended Track: 090003.mp3 - Similarity Score: 0.44844454526901245
Recommended Track: 093442.mp3 - Similarity Score: 0.4810391068458557
Recommended Track: 091635.mp3 - Similarity Score: 0.4901089072227478
Recommended Track: 091773.mp3 - Similarity Score: 0.5003558993339539
Recommended Track: 092273.mp3 - Similarity Score: 0.5029585659503937
Recommended Track: 091828.mp3 - Similarity Score: 0.5037169456481934
Recommended Track: 091268.mp3 - Similarity Score: 0.5423231720924377
Recommended Track: 092201.mp3 - Similarity Score: 0.5428546667098999
