In [44]:
import librosa
from tqdm import tqdm
import os
import pandas as pd
import numpy as np
import pickle

In [45]:
def get_audio_files(path, extension):
    files = []
    for file in os.listdir(path):
        if file.endswith(extension):
            files.append(os.path.join(path, file))
    return files

In [46]:
def get_features(y, sr):
    y = y[0:sr*3] 	# analyze just first second
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.amplitude_to_db(S, ref=np.max)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
    delta_mfcc = librosa.feature.delta(mfcc, mode='nearest')
    delta2_mfcc = librosa.feature.delta(mfcc, order=2, mode='nearest')
    feature_vector = np.concatenate(
        (np.mean(mfcc, 1), np.mean(delta_mfcc, 1), np.mean(delta2_mfcc, 1)))
    feature_vector = (feature_vector-np.mean(feature_vector)
                      ) / np.std(feature_vector)
    return feature_vector

In [None]:
audio_dir = '/Users/frederik/OneDrive - Roskilde Universitet/multivocal - anyines'
files = get_audio_files(audio_dir, 'wav')
feature_vectors = []
for f in tqdm(files):
    y, sr = librosa.load(f)
    feat = get_features(y, sr)
    feature_vectors.append({"file": f, "features": feat})

with open('features.pkl', 'wb') as f:
    pickle.dump(feature_vectors,f)

In [48]:
loaded_feature_vectors = []

with open('features.pkl', 'rb') as f:
    loaded_feature_vectors = pickle.load(f)

In [50]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, learning_rate=200, perplexity=50,
                verbose=1, angle=0.1)

In [51]:
tsne = tsne.fit_transform([f["features"] for f in feature_vectors])

[t-SNE] Computing 30 nearest neighbors...
[t-SNE] Indexed 31 samples in 0.000s...
[t-SNE] Computed neighbors for 31 samples in 0.005s...
[t-SNE] Computed conditional probabilities for sample 31 / 31
[t-SNE] Mean sigma: 1125899906842624.000000
[t-SNE] KL divergence after 250 iterations with early exaggeration: 46.512260
[t-SNE] KL divergence after 1000 iterations: 0.509151


In [56]:
data = []
for i, f in enumerate(feature_vectors):
    abspath = os.path.abspath(f['file'])
    file_name = os.path.basename(f['file'])
    data.append([abspath, file_name, tsne[i,0], tsne[i,1]])

df = pd.DataFrame(data, columns =['path','file_name','x','y'])
df.to_csv('feature_vectors.csv', index=False)

In [57]:
# create our callback function
def play_sound(trace, points, selector):
    path = df.iloc[points.point_inds].path.item()
    pygame.mixer.init()
    pygame.mixer.music.load(path)
    pygame.mixer.music.play(1)

In [59]:
import plotly.graph_objects as go
import pygame

f = go.FigureWidget([go.Scatter(x=df.x, y=df.y, text=df.file_name, 
                                customdata=[df.path, df.file_name], 
                                mode='markers')])
scatter = f.data[0]
f.layout.hovermode = 'closest'
f.update_traces(hovertemplate="%{text}<extra></extra>") # clean up hover label
scatter.on_click(play_sound)

f

pygame 2.0.1 (SDL 2.0.14, Python 3.9.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


FigureWidget({
    'data': [{'customdata': [['/Users/frederik/OneDrive - Roskilde
                            …