In [6]:
import librosa
import numpy as np
import soundfile as sf
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import os

In [None]:
def get_features(audio_path):
    y, sr = librosa.load(audio_path)
    
    hop_length = 512
    
    # Extract MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=hop_length)
    delta_mfcc = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    mfccs = np.vstack([mfcc, delta_mfcc, delta2_mfcc])
    
    # Extract spectral features with consistent hop_length
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=hop_length)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, hop_length=hop_length)
    D = np.abs(librosa.stft(y, hop_length=hop_length))
    spectral_flux = np.sum(np.diff(D, axis=1) ** 2, axis=0)
    spectral_flux = np.pad(spectral_flux, (0, 1), mode='edge')
    
    # Ensure all features have the same length
    min_length = min(mfccs.shape[1], 
                    spectral_centroid.shape[1], 
                    spectral_rolloff.shape[1], 
                    len(spectral_flux))
    
    mfccs = mfccs[:, :min_length]
    spectral_centroid = spectral_centroid[:, :min_length]
    spectral_rolloff = spectral_rolloff[:, :min_length]
    spectral_flux = spectral_flux[:min_length]
    
    # Stack spectral features
    spectral_features = np.vstack([spectral_centroid, 
                                 spectral_rolloff, 
                                 spectral_flux.reshape(1, -1)])
    
    # Calculate i-vectors
    n_components = 10
    pca = PCA(n_components=n_components)
    ivectors = pca.fit_transform(mfccs.T)
    
    # Combine all features
    features = np.hstack([mfccs.T, spectral_features.T, ivectors])
    
    # Scale features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    return scaled_features

In [14]:
def diarize_kmeans(audio_path, k=2):
    audio, sr = librosa.load(audio_path)
    speakers = []
    paths = []
    
    
    kmeans = KMeans(n_clusters=k+1, random_state=42)
    labels = kmeans.fit_predict(get_features(audio_path))
    for cluster in range(k+1):
        cluster_indices = np.where(labels == cluster)[0]
        cluster_audio = []
        hop_length = 512
        
        for idx in cluster_indices:
            start_sample = idx * hop_length
            end_sample = (idx + 1) * hop_length
            cluster_audio.append(audio[start_sample:end_sample])
        
        cluster_audio = np.concatenate(cluster_audio)
        
        output_file = f'kmeans/cluster_{cluster}.wav'
        sf.write(output_file, cluster_audio, sr)
        paths.append(os.path.abspath(output_file))
        speakers.append((cluster_audio, sr))
        print(f'Cluster {cluster} audio saved to {output_file}')
    
    return paths

In [12]:
diarize_kmeans('gwtwd.wav')

Cluster 0 audio saved to kmeans/cluster_0.wav
Cluster 1 audio saved to kmeans/cluster_1.wav
Cluster 2 audio saved to kmeans/cluster_2.wav


[WinError 2] The system cannot find the file specified
  File "c:\Users\bala\AppData\Local\Programs\Python\Python312\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "c:\Users\bala\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\bala\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "c:\Users\bala\AppData\Local\Programs\Python\Python312\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


['c:\\Users\\bala\\code\\python\\ML_Project-main\\kmeans\\cluster_0.wav',
 'c:\\Users\\bala\\code\\python\\ML_Project-main\\kmeans\\cluster_1.wav',
 'c:\\Users\\bala\\code\\python\\ML_Project-main\\kmeans\\cluster_2.wav']