In [None]:
%load_ext autoreload
%autoreload 2

## Test pipeline on one instance


In [None]:
from pathlib import Path
import librosa
import IPython.display as ipd
from pitch_estimator import PitchEstimator
from preprocessors import Preprocessor
import matplotlib.pyplot as plt
import scipy
import numpy as np

In [None]:
data_path = Path("gamelan_music_dataset")
targets_first = data_path / "first ensemble/orchestra/target"
audio_file_path = targets_first / "demung/001.wav"

pitch_est = PitchEstimator()
pp = Preprocessor()

# Display input audio
y, sr = librosa.load(audio_file_path, duration=10)
print('Input audio:')
display(ipd.Audio(y, rate=sr))

# Using median filtering to divide harmonic from percussive component
spectrogram = pp.compute_spectrogram(y)
harmonic, percussive = pp.apply_median_filtering(spectrogram)


# Reconstruct harmonic component audio and display it
reconstructed_audio_griffinlim = librosa.griffinlim(harmonic)
reconstructed_audio_griffinlim_percussive = librosa.griffinlim(percussive)

# https://librosa.org/doc/main/generated/librosa.griffinlim.html
print('Harmonic component reconstructed with griffin and lim algorithm:')
display(ipd.Audio(reconstructed_audio_griffinlim, rate=sr))


In [None]:
onsets = librosa.onset.onset_detect(y=reconstructed_audio_griffinlim_percussive, units="time")

In [None]:
onsets

In [None]:
time, frequency, confidence, activation = pitch_est.estimate_crepe(reconstructed_audio_griffinlim, sr)


In [None]:
# Plot the interpolated frequencies
plt.plot(time, frequency)
plt.xlabel('Time') 
plt.ylabel('Frequency')
plt.title('Crepe Frequencies')

for t in onsets:
    plt.axvline(x=round(t, 2), color='r', linestyle='-', label=f'Time {t}')

plt.show()

In [None]:
tones = []
for i, onset in enumerate(onsets):
    index_a = np.argmax(time > onset)
    if i == len(onsets) - 1: index_b = len(time) - 1
    else: index_b = np.argmax(time > onsets[i+1])
    frequency_range = frequency[index_a:index_b]
    tone = np.median(frequency_range)
    print(f'Onset {i}-{i+1}: {tone}')
    tones.append(tone)

In [None]:
sorted_tones = sorted(tones)

In [None]:
def group_and_average_frequencies(frequencies, threshold=10):
    grouped_frequencies = []
    current_group = [frequencies[0]]
    
    for i in range(1, len(frequencies)):
        if frequencies[i] - frequencies[i-1] <= threshold:
            current_group.append(frequencies[i])
        else:
            grouped_frequencies.append(sum(current_group) / len(current_group))
            current_group = [frequencies[i]]
    
    grouped_frequencies.append(sum(current_group) / len(current_group))
    
    return grouped_frequencies

In [None]:
grouped_averaged_frequencies = group_and_average_frequencies(sorted_tones)
print(grouped_averaged_frequencies)

In [None]:
def get_tuning_vectors():
    """
    Returns 3 gamelan tuning.
    Each entry in the vectors represent the interval in cents between consecutive tones starting from ding.
    """
    begbeg = np.array([120, 114, 432, 81, 453])
    sedang = np.array([136, 155, 379, 134, 396])
    tirus = np.array([197, 180, 347, 104, 372])
    return begbeg, sedang, tirus