In [None]:
import os
from spleeter.separator import Separator
import librosa
import librosa.display
from sklearn.cluster import KMeans
import tensorflow_hub as hub
import numpy as np
import shutil
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:


# Initialize Spleeter separator and YAMNet model
separator = Separator('spleeter:2stems')
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

def separate_sources(audio_file_path, output_directory):
    separator.separate_to_file(audio_file_path, output_directory)

def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    return mfccs

def perform_clustering(features_list, n_clusters):
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(features_list)
    labels = kmeans.labels_
    return labels

def classify_sound(yamnet_model, wav_data):
    scores, embeddings, spectrogram = yamnet_model(wav_data)
    return scores.numpy()

def save_results(labels, file_paths, output_directory):
    for label, file_path in zip(labels, file_paths):
        destination = os.path.join(output_directory, str(label))
        os.makedirs(destination, exist_ok=True)
        shutil.move(file_path, destination)

def plot_spectrogram(file_path):
    y, sr = librosa.load(file_path, sr=None)
    plt.figure(figsize=(10, 4))
    D = librosa.stft(y)
    db = librosa.amplitude_to_db(D, ref=np.max)
    librosa.display.specshow(db, sr=sr, x_axis='time', y_axis='log')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.show()
def plot_mfccs(mfccs):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    plt.tight_layout()
    plt.show()


def plot_cluster_distribution(labels):
    sns.countplot(x=labels)
    plt.title('Cluster Distribution')
    plt.xlabel('Cluster Label')
    plt.ylabel('Count')
    plt.show()


In [None]:

# Path to your main folder containing class folders
main_folder_path = 'upload3'

# Iterate over class folders and individual files
for class_folder in os.listdir(main_folder_path):
    class_folder_path = os.path.join(main_folder_path, class_folder)
    
    # Ensure it's a folder
    if not os.path.isdir(class_folder_path):
        continue
    
    # Output directory for separated sources
    output_directory = os.path.join(class_folder_path, 'separated_sources')
    os.makedirs(output_directory, exist_ok=True)
    
    # List to store features for clustering
    features_list = []
    
    for audio_file in os.listdir(class_folder_path):
        audio_file_path = os.path.join(class_folder_path, audio_file)
        
        # Ensure it's a file and not the output directory
        if not os.path.isfile(audio_file_path) or audio_file_path == output_directory:
            continue
        
        # Separate sources using Spleeter
        separate_sources(audio_file_path, output_directory)
        
        # Extract features for each separated source (assuming spleeter output is a WAV file)
        for separated_source in os.listdir(output_directory):
            separated_source_path = os.path.join(output_directory, separated_source, 'vocals.wav')
            features = extract_features(separated_source_path)
            features_list.append(features)
            
            # Plotting spectrogram and MFCCs for each separated source
            plot_spectrogram(separated_source_path)
            plot_mfccs(features)
        
    # Perform clustering on extracted features
    n_clusters = 3  # Set an appropriate number of clusters
    labels = perform_clustering(features_list, n_clusters)
    
    # Plotting cluster distribution
    plot_cluster_distribution(labels)
    
    # 
    save_results(labels, os.listdir(output_directory), class_folder_path)
