## Importing the required libraries

In [1]:
import librosa
import numpy as np
from fft import nearest_pow_2, fft
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
import json
import math

## Loading the audio with original sampling rate

In [2]:
audio, sample_rate = librosa.load('audio_files/River Flows in You.wav', sr=None) # Smapling at the original sampling rate
print(len(audio))
print(sample_rate)

960000
48000


## Applying FFT to the amplitude samples

In [3]:
frequency_spectrum = fft(list(audio))

In [4]:
# Computing the magnitudes of the frequency spectrum
magnitudes = np.abs(frequency_spectrum)

# Frequency resolution
frequency_resolution = sample_rate / len(frequency_spectrum)

# Generating the frequency axis
frequencies = np.arange(0, sample_rate, frequency_resolution)

# Considering only the first half of the spectrum (positive frequencies)
half_index = len(frequency_spectrum) // 2
frequencies = frequencies[:half_index]
magnitudes = magnitudes[:half_index]


In [5]:
# Find the indices of the 20 largest magnitudes (dominant frequencies)
indices = np.argsort(magnitudes)[-500:]

# Sorting indices to display them in increasing order of frequency
sorted_indices = indices[np.argsort(frequencies[indices])]

# Extracting the dominant frequencies and their magnitudes
dominant_frequencies = [freq for freq in frequencies[sorted_indices] if freq <=4186]

dominant_magnitudes = magnitudes[sorted_indices]

# Convert frequencies to numpy array and reshape for clustering
X = np.array(dominant_frequencies).reshape(-1, 1)

# Create DBSCAN clusterer
db = DBSCAN(eps=frequency_resolution*10, min_samples=4).fit(X)  # Adjust eps based on the expected cluster width in Hz

# Extract labels
labels = db.labels_

# Number of clusters in labels, ignoring noise if present
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print(f"Estimated number of clusters: {n_clusters_}")
print(f"Estimated number of noise points: {n_noise_}")

Estimated number of clusters: 18
Estimated number of noise points: 11


In [7]:
# Getting the average frequency for all the clusters

cluster_averages = {}

for index, label in enumerate(labels):
    if label!=-1:
        if label not in cluster_averages.keys():
            cluster_averages[label] = []
        cluster_averages[label].append(dominant_frequencies[index])

for (key, val) in cluster_averages.items():
    cluster_averages[key] = sum(cluster_averages[key])/len(cluster_averages[key])


In [9]:
# Mapping the average frequencies with piano notes
with open('piano_frequencies.json', 'r') as f:
    piano_frequencies = json.load(f)

key_mapping = {}

for (label, average_freq) in cluster_averages.items():
    distances = [math.dist([average_freq], [val]) for val in list(piano_frequencies.values())]
    key_mapping[average_freq] = list(piano_frequencies.keys())[np.argmin(distances)]

print(f"Piano notes played in the audio are: \n {list(set(key_mapping.values()))}")


Piano notes played in the audio are: 
 ['D5', 'A5', 'A4', 'C#4/Db4', 'D3', 'A3', 'D4', 'F#4/Gb4', 'F#5/Gb5', 'G#5/Ab5', 'E4', 'E5', 'C#5/Db5', 'F#3/Gb3']
