In [1]:
import librosa
import torchaudio
import os
from tqdm import tqdm
import random as r
# from sklearn import KMeans
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import sklearn
import pickle 
import matplotlib.pyplot as plt

In [2]:
import numpy as np
import librosa
import scipy.signal

def extract_features(audio, sr):
    # Assuming `audio` is a 1D numpy array representing the audio clip
    # and `sr` is the sample rate

    # Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
    
    # Spectral Flux
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
    spectral_flux = np.diff(onset_env)
    
    # Spectral Flatness
    spectral_flatness = librosa.feature.spectral_flatness(y=audio)
    
    # CQT for Harmonic Analysis
    cqt = np.abs(librosa.cqt(y=audio, sr=sr))
    
    # Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
    
    # LPC - Correct Implementation
    # lpc_order = 16
    # audio_mono = librosa.to_mono(audio)
    # # lpc_coeffs, _ = scipy.signal.lfilter([1], 1, audio_mono)  # Placeholder for correct LPC calculation
    
    # # # Correct LPC implementation placeholder
    # # # Assuming scipy.signal.find_lpc or similar function:
    # lpc_coeffs, _ = scipy.signal.find_peaks(audio_mono, lpc_order)

    # Concatenate all features
    # For simplicity, here we just show placeholders for how these features might be concatenated
    features = np.concatenate([
        np.mean(spectral_centroid, axis=1),  # Mean spectral centroid
        spectral_flux,  # Spectral flux as is, may require further processing
        np.mean(spectral_flatness, axis=1),  # Mean spectral flatness
        np.mean(cqt, axis=1),  # Mean across CQT bins
        np.mean(spectral_rolloff, axis=1),  # Mean spectral rolloff
        # lpc_coeffs  # Placeholder for LPC coefficients, needs correct calculation
    ])
    
    return features

In [None]:
path = "data/rendered_audio/rendered_audio/"
files = os.listdir(path)
r.shuffle(files)
fs = []
embeddings = []

for file in tqdm(files):
    if ".flac" in file:
        try:
            full_path = path + file
            audio, _ = torchaudio.load(full_path)
            audio_2 = audio.squeeze(0).numpy()
            features = extract_features(audio_2, 44100)
            embeddings.append(features)
            fs.append(full_path)
        except Exception as e:
            print("error", e)
            


  9%|████████████▌                                                                                                                        | 2556/27131 [03:49<39:32, 10.36it/s]

In [None]:
emb = [e.flatten() for e in embeddings]

In [None]:
pca = PCA(n_components=3, svd_solver='full')
pca_ = pca.fit_transform(emb)

In [None]:
kmean_model = KMeans(n_clusters=11).fit(pca_)

In [None]:
labels = kmean_model.predict(pca_)

In [None]:
feature_a = []
feature_b = []
feature_c = []
# feature_d = []
# feature_e = []

for i in pca_:
    feature_a.append(i[0])
    feature_b.append(i[1])
    feature_c.append(i[2])
    # feature_d.append(i[3])
    # feature_e.append(i[4])

In [None]:
plots = [feature_a, feature_b, feature_c]#, feature_d, feature_e]

for idx, val in enumerate(plots):
    for idx1, val1 in enumerate(plots):
        if idx > idx1:
            #plotting the results
            plt.scatter(val, val1, c=labels)
            plt.show()

In [None]:
# import pandas as pd

# df = pd.DataFrame()
# df['file'] = fs
# df['label'] = labels
# df['pca_feature_a'] = feature_a
# df['pca_feature_b'] = feature_b
# df['pca_feature_c'] = feature_c

In [None]:
df

In [None]:
import plotly.graph_objs as go
import plotly.offline as py

import pandas as pd
import numpy as np
from ipywidgets import interactive, HBox, VBox

py.init_notebook_mode()

f = go.FigureWidget([go.Scatter3d(x = df['pca_feature_a'], y = df['pca_feature_b'], z=df['pca_feature_c'])])
scatter = f.data[0]
N = len(df)
scatter.x = scatter.x + np.random.rand(N)/10 *(df['pca_feature_a'].max() - df['pca_feature_a'].min())
scatter.y = scatter.y + np.random.rand(N)/10 *(df['pca_feature_b'].max() - df['pca_feature_b'].min())
scatter.z = scatter.z + np.random.rand(N)/10 *(df['pca_feature_c'].max() - df['pca_feature_c'].min())
scatter.marker.opacity = 0.5

def update_axes(xaxis, yaxis, zaxis):
    scatter = f.data[0]
    scatter.x = df[xaxis]
    scatter.y = df[yaxis]
    scatter.z = df[zaxis]
    with f.batch_update():
        f.layout.xaxis.title = xaxis
        f.layout.yaxis.title = yaxis
        f.layout.zaxis.title = zaxis
        scatter.x = scatter.x + np.random.rand(N)/10 *(df[xaxis].max() - df[xaxis].min())
        scatter.y = scatter.y + np.random.rand(N)/10 *(df[yaxis].max() - df[yaxis].min())
        scatter.z = scatter.y + np.random.rand(N)/10 *(df[yaxis].max() - df[yaxis].min())

axis_dropdowns = interactive(update_axes, xaxis = df.select_dtypes('int64').columns, yaxis = df.select_dtypes('int64').columns,  zaxis = df.select_dtypes('int64').columns)

# # Create a table FigureWidget that updates on selection from points in the scatter plot of f
# t = go.FigureWidget([go.Table(
#     header=dict(values=['pca_feature_a','pca_feature_b','pca_feature_c'],
#                 fill = dict(color='#C2D4FF'),
#                 align = ['left'] * 5),
#     cells=dict(values=[df[col] for col in ['pca_feature_a','pca_feature_b','pca_feature_c']],
#                fill = dict(color='#F5F8FF'),
#                align = ['left'] * 5))])

# def selection_fn(trace,points,selector):
#     t.data[0].cells.values = [df.loc[points.point_inds][col] for col in ['pca_feature_a','pca_feature_b','pca_feature_c']]

# scatter.on_selection(selection_fn)

# Put everything together
VBox((HBox(axis_dropdowns.children),f,t))