# Good First Working Version


In [29]:
import numpy as np
import plotly.graph_objects as go
import os
import ipywidgets as widgets
from IPython.display import display
import librosa
from IPython.display import display, Audio

np.random.seed(1)


def list_audio_files(directory):
    return [f for f in os.listdir(directory) if f.endswith(".wav")]


def analyze_audio(file_path):
    # Load audio file
    y, sr = librosa.load(file_path)

    # Calculate duration
    duration = librosa.get_duration(y=y, sr=sr)

    # Calculate loudness (RMS)
    rms = np.sqrt(np.mean(y**2))

    return duration, rms


directory = "sounds/4 - ZENITH - Drum One Shots/"
audio_files = list_audio_files(directory)

durations = []

loudness = []


for audio_file in audio_files:
    file_path = os.path.join(directory, audio_file)
    duration, rms = analyze_audio(file_path)
    durations.append(duration)
    loudness.append(rms)

    # Create an interactive plot with Plotly

    # Add scatter plot
f = go.FigureWidget(
    [
        go.Scatter(
            x=durations,
            y=loudness,
            mode="markers",
            text=audio_files,
            textposition="top center",
            marker=dict(size=10, color="blue", opacity=0.8),
            hoverinfo="text",
        )
    ]
)

f.update_layout(
    title="Audio Files: Duration vs Loudness",
    xaxis_title="Duration (seconds)",
    yaxis_title="Loudness (RMS)",
    showlegend=False,
)
scatter = f.data[0]
colors = ["#a3a7e4"] * 100
scatter.marker.color = colors
scatter.marker.size = [10] * 100
f.layout.hovermode = "closest"


# create our callback function
def update_point(trace, points, selector):
    if points.point_inds:
        # Get the index of the clicked point
        ind = points.point_inds[0]

        # Get the corresponding audio file
        audio_file = audio_files[ind]
        file_path = os.path.join(directory, audio_file)

        # Play the audio file
        display(Audio(filename=file_path, autoplay=True))


scatter.on_click(update_point)

display(f)

FigureWidget({
    'data': [{'hoverinfo': 'text',
              'marker': {'color': [#a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,
                                   #a3a7e4, #a3a7e4, #a3a7e4, #a3a7e4,

## usinf pygame to create a simple game


In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import librosa
from IPython.display import display, Audio
import os
import pygame

np.random.seed(1)


def list_audio_files(directory):
    return [
        sounds_figure
        for sounds_figure in os.listdir(directory)
        if sounds_figure.endswith(".wav")
    ]


def analyze_audio(file_path):
    # Load audio file
    y, sr = librosa.load(file_path)

    # Calculate duration
    duration = librosa.get_duration(y=y, sr=sr)

    # Calculate loudness (RMS)
    rms = np.sqrt(np.mean(y**2))

    return duration, rms


directory = "sounds/4 - ZENITH - Drum One Shots/"
audio_files = list_audio_files(directory)

durations = []
loudness = []

for audio_file in audio_files:
    file_path = os.path.join(directory, audio_file)
    duration, rms = analyze_audio(file_path)
    durations.append(duration)
    loudness.append(rms)

# create a subplots with plotly

main_fig_subplots = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3])


# Create an interactive plot with Plotly
sounds_figure = go.FigureWidget(
    [
        go.Scatter(
            x=durations,
            y=loudness,
            mode="markers",
            text=audio_files,
            textposition="top center",
            marker=dict(size=10, color="blue", opacity=0.8),
            hoverinfo="text",
        )
    ]
)

sounds_figure.update_layout(
    title="Audio Files: Duration vs Loudness",
    xaxis_title="Duration (seconds)",
    yaxis_title="Loudness (RMS)",
    showlegend=False,
)

scatter = sounds_figure.data[0]
colors = ["#a3a7e4"] * len(audio_files)
scatter.marker.color = colors
scatter.marker.size = [10] * len(audio_files)
sounds_figure.layout.hovermode = "closest"


main_fig_subplots.add_trace(sounds_figure.data[0], row=1, col=1)

pygame.mixer.init()


# Create a callback function to play the corresponding audio file
def update_point(trace, points, selector):
    if points.point_inds:
        # Get the index of the clicked point
        ind = points.point_inds[0]

        # Get the corresponding audio file
        audio_file = audio_files[ind]
        file_path = os.path.join(directory, audio_file)

        # Play the audio file automatically
        pygame.mixer.music.load(file_path)
        pygame.mixer.music.play()

        # now i want to use librosa to get signal
        y, sr = librosa.load(file_path)
        # make a plotly figure to display waveform
        fig = go.FigureWidget()
        fig.add_trace(go.Scatter(y=y, mode="lines"))
        fig.update_layout(title="Waveform of {}".format(audio_file))

        main_fig_subplots.add_trace(fig.data[0], row=1, col=2)


scatter.on_click(update_point)
scatter.on_hover(update_point)

# Display the figure
display(main_fig_subplots)

# WIP


In [32]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import librosa
from IPython.display import display, Audio
import os
import pygame

np.random.seed(1)


def list_audio_files(directory):
    return [
        sounds_figure
        for sounds_figure in os.listdir(directory)
        if sounds_figure.endswith(".wav")
    ]


def analyze_audio(file_path):
    # Load audio file
    y, sr = librosa.load(file_path)

    pitch = librosa.yin(
        y, fmin=librosa.note_to_hz("C2"), fmax=librosa.note_to_hz("C7")
    )
    mean_pitch = np.mean(pitch)
    # print(f"pitch: {mean_pitch}")

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mean_mfccs = np.mean(mfccs, axis=1)
    # print(f"mfccs: {mean_mfccs}")

    zcr = librosa.feature.zero_crossing_rate(y)
    mean_zcr = np.mean(zcr)
    # print(f"zcr: {mean_zcr}")

    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mean_chroma = np.mean(chroma, axis=1)
    # print(f"chroma: {mean_chroma}")

    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    mean_spectral_centroid = np.mean(spectral_centroid)
    # print(f"spectral_centroid: {mean_spectral_centroid}")

    harmonic = librosa.effects.harmonic(y)
    mean_harmonic = np.mean(harmonic)
    # print(f"harmonic: {mean_harmonic}")

    percussive = librosa.effects.percussive(y)
    mean_percussive = np.mean(percussive)
    # print(f"percussive: {mean_percussive}")

    spectral_flatness = librosa.feature.spectral_flatness(y=y)
    mean_spectral_flatness = np.mean(spectral_flatness)
    # print(f"spectral_flatness: {mean_spectral_flatness}")

    # Calculate duration
    duration = librosa.get_duration(y=y, sr=sr)

    # Calculate loudness (RMS)
    rms = np.sqrt(np.mean(y**2))

    feature_vector = np.concatenate(
        (
            [
                duration,
                rms,
                mean_pitch,
                mean_spectral_centroid,
                mean_zcr,
                mean_spectral_flatness,
                mean_harmonic,
                mean_percussive,
            ],
            mean_mfccs,
            mean_chroma,
        )
    )

    # print("Feature Vector:", feature_vector)
    # print("Feature Vector Shape:", feature_vector.shape)

    return feature_vector


directory = "sounds/4 - ZENITH - Drum One Shots/"
audio_files = list_audio_files(directory)

for audio_file in audio_files:
    file_path = os.path.join(directory, audio_file)
    audio_features = analyze_audio(file_path)

# create a subplots with plotly

main_fig_subplots = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3])

# Create an interactive plot with Plotly

scatter = go.Scatter(
    x=durations,
    y=loudness,
    mode="markers",
    text=audio_files,
    textposition="top center",
    marker=dict(size=10, color="blue", opacity=0.8),
    hoverinfo="text",
)

main_fig_subplots.add_trace(scatter, row=1, col=1)

main_fig_subplots.update_layout(
    title="Sound Cluster (alpha 0.1 @zean)",
    xaxis_title="Duration (seconds)",
    yaxis_title="Loudness (RMS)",
    showlegend=False,
)

main_fig_subplots.layout.hovermode = "closest"

# Convert main_fig_subplots to a FigureWidget to attach callbacks
main_fig_widget = go.FigureWidget(main_fig_subplots)

pygame.mixer.init()


# Create a callback function to play the corresponding audio file and update the waveform
def update_point(trace, points, selector):
    if points.point_inds:
        # Get the index of the clicked point
        ind = points.point_inds[0]

        # Get the corresponding audio file
        audio_file = audio_files[ind]
        file_path = os.path.join(directory, audio_file)

        # Play the audio file automatically
        pygame.mixer.music.load(file_path)
        pygame.mixer.music.play()

        # Load the audio file using librosa
        y, sr = librosa.load(file_path)

        

        # Create a waveform plot
        waveform_trace = go.Scatter(y=y, mode="lines")

        # Update the second subplot with the waveform
        with main_fig_widget.batch_update():
            main_fig_widget.data[1].x = list(range(len(y)))
            main_fig_widget.data[1].y = y
            main_fig_widget.layout.annotations = [
                go.layout.Annotation(
                    text=f"{audio_file}",
                    x=1,
                    y=1.15,  # Position over the second subplot
                    xref="paper",
                    yref="paper",
                    showarrow=False,
                )
            ]


# Attach the callback to the scatter trace
main_fig_widget.data[0].on_click(update_point)
# main_fig_widget.data[0].on_hover(update_point)


# Initialize the second subplot with an empty trace to be updated later
main_fig_widget.add_trace(go.Scatter(y=[]), row=1, col=2)

# Display the figure
display(main_fig_widget)

FigureWidget({
    'data': [{'hoverinfo': 'text',
              'marker': {'color': 'blue', 'opacity': 0.8, 'size': 10},
              'mode': 'markers',
              'text': [Cymatics - Ride (Ghoul).wav, Cymatics - Percussion
                       (Cowbell).wav, Cymatics - Ride (Surprise).wav, Cymatics -
                       Clap (Ambulance).wav, Cymatics - Open Hat (Terrain).wav,
                       Cymatics - Snare (Baller).wav, Cymatics - Clap (Equal).wav,
                       Cymatics - Open Hat (DoubleDecker).wav, Cymatics - Hihat
                       (Vibrant).wav, Cymatics - Percussion (Knight).wav, Cymatics
                       - Clap (TwoTails).wav, Cymatics - Hihat (Power).wav,
                       Cymatics - Clap (Natural).wav, Cymatics - Snap (Much).wav,
                       Cymatics - Snap (Coffee).wav, Cymatics - Cymbal
                       (Layers).wav, Cymatics - Kick (Pushed).wav, Cymatics -
                       Cymbal (Industrial).wav, Cymatics -

In [22]:
# now we want to gather the data from sound using wave library and plot it using plotly as above
import wave
import numpy as np
import pandas as pd


def analyze_audio(file_path):
    # Load audio file
    with wave.open(file_path, "rb") as f:
        frames = f.readframes(-1)
        signal = np.frombuffer(frames, "int16")
        sr = f.getframerate()
        # get number of channels
        n_channels = f.getnchannels()
        # duration of the audio
        duration = f.getnframes() / sr
        # calculate loudness handlinf the number of channels invalid value in sqrt

        # aggregate to one value the data signal bcause it is a ndarray
        signal = signal.tolist()
        # calculate avf of the signal
        signal = np.mean(signal)

    return duration, sr, n_channels, signal


# Create an empty dataframe
df_audio = pd.DataFrame(
    columns=["Audio File", "Duration", "Sample Rate", "Channels", "Signal"]
)

for audio_file in audio_files:
    file_path = os.path.join(directory, audio_file)
    duration, sr, n_channels, signal = analyze_audio(file_path)
    new_row = pd.DataFrame(
        {
            "Audio File": [audio_file],
            "Duration": [duration],
            "Sample Rate": [sr],
            "Channels": [n_channels],
            "Signal": [signal],
        }
    )
    df_audio = pd.concat([df_audio, new_row], ignore_index=True)

df_audio


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



Unnamed: 0,Audio File,Duration,Sample Rate,Channels,Signal
0,Cymatics - Ride (Ghoul).wav,0.515488,44100,2,30.103213
1,Cymatics - Percussion (Cowbell).wav,0.551646,48000,2,4.459855
2,Cymatics - Ride (Surprise).wav,0.663424,44100,2,39.988675
3,Cymatics - Clap (Ambulance).wav,0.110385,44100,2,22.899856
4,Cymatics - Open Hat (Terrain).wav,1.875011,44100,2,23.814504
5,Cymatics - Snare (Baller).wav,0.311542,44100,2,-108.986171
6,Cymatics - Clap (Equal).wav,0.263673,44100,2,-32.124412
7,Cymatics - Open Hat (DoubleDecker).wav,0.491224,44100,2,-185.5001
8,Cymatics - Hihat (Vibrant).wav,0.186938,48000,2,-13.893272
9,Cymatics - Percussion (Knight).wav,0.431361,44100,2,102.019117


In [25]:
import librosa

# Load audio file
audio_file = "sounds/4 - ZENITH - Drum One Shots/Cymatics - Open Hat (Projectile).wav"
y, sr = librosa.load(audio_file)

# Detect onsets
onset_frames = librosa.onset.onset_detect(y=y, sr=sr)

# Extract chroma features
chroma = librosa.feature.chroma_stft(y=y, sr=sr)


# Track notes and calculate durations
notes = []
note_durations = []
for i, onset in enumerate(onset_frames):
    chroma_at_onset = chroma[:, onset]
    note_pitch = chroma_at_onset.argmax()
    notes.append(note_pitch)
    if i > 0:
        duration = librosa.frames_to_time(onset - onset_frames[i - 1], sr=sr)
        note_durations.append(duration)

# Output results
for note, duration in zip(notes, note_durations):
    print(f"Note: {note}, Duration: {duration:.2f} seconds")

[ 3  5 14]
Note: 10, Duration: 0.05 seconds
Note: 10, Duration: 0.21 seconds


In [58]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import librosa
from IPython.display import display, Audio
import os
import pygame
import pandas as pd

np.random.seed(1)


def list_audio_files(directory):
    return [
        sounds_figure
        for sounds_figure in os.listdir(directory)
        if sounds_figure.endswith(".wav")
    ]


def analyze_audio(file_path):
    # Load audio file
    y, sr = librosa.load(file_path)

    pitch = librosa.yin(y, fmin=librosa.note_to_hz("C2"), fmax=librosa.note_to_hz("C7"))
    mean_pitch = np.mean(pitch)
    # print(f"pitch: {mean_pitch}")

    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mean_mfccs = np.mean(mfccs, axis=1)
    # print(f"mfccs: {mean_mfccs}")

    zcr = librosa.feature.zero_crossing_rate(y)
    mean_zcr = np.mean(zcr)
    # print(f"zcr: {mean_zcr}")

    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mean_chroma = np.mean(chroma, axis=1)
    # print(f"chroma: {mean_chroma}")

    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    mean_spectral_centroid = np.mean(spectral_centroid)
    # print(f"spectral_centroid: {mean_spectral_centroid}")

    harmonic = librosa.effects.harmonic(y)
    mean_harmonic = np.mean(harmonic)
    # print(f"harmonic: {mean_harmonic}")

    percussive = librosa.effects.percussive(y)
    mean_percussive = np.mean(percussive)
    # print(f"percussive: {mean_percussive}")

    spectral_flatness = librosa.feature.spectral_flatness(y=y)
    mean_spectral_flatness = np.mean(spectral_flatness)
    # print(f"spectral_flatness: {mean_spectral_flatness}")

    # Calculate duration
    duration = librosa.get_duration(y=y, sr=sr)

    # Calculate loudness (RMS)
    rms = np.sqrt(np.mean(y**2))

    feature_vector = [
        duration,
        rms,
        mean_pitch,
        mean_spectral_centroid,
        # mean_zcr,
        # mean_spectral_flatness,
        # mean_harmonic,
        # mean_percussive,
    ]

    # print("Feature Vector:", feature_vector)
    # print("Feature Vector Shape:", feature_vector.shape)

    return feature_vector


directory = "sounds/4 - ZENITH - Drum One Shots/"
audio_files = list_audio_files(directory)
sounds_df = pd.DataFrame(columns=range(4))

for audio_file in audio_files:
    file_path = os.path.join(directory, audio_file)
    audio_features = analyze_audio(file_path)
    sounds_df.loc[audio_file] = audio_features


# before using pca we need to normalize the data
from sklearn.preprocessing import Normalizer

scaler = Normalizer()
sounds_df_normalized = scaler.fit_transform(sounds_df)
# display(sounds_df_normalized)

# use pca to reduce the dimensionality of the data to 2 features
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
sounds_df_pca = pca.fit_transform(sounds_df_normalized)

sounds_df_pca = pd.DataFrame(
    sounds_df_pca, index=sounds_df.index, columns=["PC1", "PC2"]
)

main_fig_subplots = make_subplots(rows=1, cols=2, column_widths=[0.7, 0.3])

# Create an interactive plot with Plotly

scatter = go.Scatter(
    x=sounds_df_pca["PC1"],
    y=sounds_df_pca["PC2"],
    mode="markers",
    text=audio_files,
    textposition="top center",
    marker=dict(size=10, color="blue", opacity=0.8),
    hoverinfo="text",
)

main_fig_subplots.add_trace(scatter, row=1, col=1)

main_fig_subplots.update_layout(
    title="Sound Cluster (alpha 0.1 @zean)",
    xaxis_title="Duration (seconds)",
    yaxis_title="Loudness (RMS)",
    showlegend=False,
)

main_fig_subplots.layout.hovermode = "closest"

# Convert main_fig_subplots to a FigureWidget to attach callbacks
main_fig_widget = go.FigureWidget(main_fig_subplots)

pygame.mixer.init()


# Create a callback function to play the corresponding audio file and update the waveform
def update_point(trace, points, selector):
    if points.point_inds:
        # Get the index of the clicked point
        ind = points.point_inds[0]

        # Get the corresponding audio file
        audio_file = audio_files[ind]
        file_path = os.path.join(directory, audio_file)

        # Play the audio file automatically
        pygame.mixer.music.load(file_path)
        pygame.mixer.music.play()

        # Load the audio file using librosa
        y, sr = librosa.load(file_path)

        # Create a waveform plot
        waveform_trace = go.Scatter(y=y, mode="lines")

        # Update the second subplot with the waveform
        with main_fig_widget.batch_update():
            main_fig_widget.data[1].x = list(range(len(y)))
            main_fig_widget.data[1].y = y
            main_fig_widget.layout.annotations = [
                go.layout.Annotation(
                    text=f"{audio_file}",
                    x=1,
                    y=1.15,  # Position over the second subplot
                    xref="paper",
                    yref="paper",
                    showarrow=False,
                )
            ]


# Attach the callback to the scatter trace
main_fig_widget.data[0].on_click(update_point)
# main_fig_widget.data[0].on_hover(update_point)


# Initialize the second subplot with an empty trace to be updated later
main_fig_widget.add_trace(go.Scatter(y=[]), row=1, col=2)

# Display the figure
display(main_fig_widget)


n_fft=2048 is too large for input signal of length=1646


Trying to estimate tuning from empty frequency set.


n_fft=2048 is too large for input signal of length=1871


n_fft=2048 is too large for input signal of length=1957


n_fft=2048 is too large for input signal of length=1699


Trying to estimate tuning from empty frequency set.


Trying to estimate tuning from empty frequency set.


Trying to estimate tuning from empty frequency set.


Trying to estimate tuning from empty frequency set.


n_fft=2048 is too large for input signal of length=1441


Trying to estimate tuning from empty frequency set.


Trying to estimate tuning from empty frequency set.


n_fft=2048 is too large for input signal of length=1814


Trying to estimate tuning from empty frequency set.


n_fft=2048 is too large for input signal of length=1980



FigureWidget({
    'data': [{'hoverinfo': 'text',
              'marker': {'color': 'blue', 'opacity': 0.8, 'size': 10},
              'mode': 'markers',
              'text': [Cymatics - Ride (Ghoul).wav, Cymatics - Percussion
                       (Cowbell).wav, Cymatics - Ride (Surprise).wav, Cymatics -
                       Clap (Ambulance).wav, Cymatics - Open Hat (Terrain).wav,
                       Cymatics - Snare (Baller).wav, Cymatics - Clap (Equal).wav,
                       Cymatics - Open Hat (DoubleDecker).wav, Cymatics - Hihat
                       (Vibrant).wav, Cymatics - Percussion (Knight).wav, Cymatics
                       - Clap (TwoTails).wav, Cymatics - Hihat (Power).wav,
                       Cymatics - Clap (Natural).wav, Cymatics - Snap (Much).wav,
                       Cymatics - Snap (Coffee).wav, Cymatics - Cymbal
                       (Layers).wav, Cymatics - Kick (Pushed).wav, Cymatics -
                       Cymbal (Industrial).wav, Cymatics -