In [95]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import soundfile as sf

In [96]:
note_freqs = {
    "C4": 261.63,
    "C#4": 277.18,
    "D4": 293.66,
    "D#4": 311.13,
    "E4": 329.63,
    "F4": 349.23,
    "F#4": 369.99,
    "G4": 392.00,
    "G#4": 415.30,
    "A4": 440.00,
    "A#4": 466.16,
    "B4": 493.88,

    "C5": 523.25,
    "C#5": 554.37,
    "D5": 587.33,
    "D#5": 622.25,
    "E5": 659.26,
    "F5": 698.46,
    "F#5": 739.99,
    "G5": 783.99,
    "G#5": 830.61,
    "A5": 880.00,
    "A#5": 932.33,
    "B5": 987.77,

    "C6": 1046.50,
    "C#6": 1108.73,
    "D6": 1174.66,
    "D#6": 1244.51,
    "E6": 1318.51,
    "F6": 1396.91,
    "F#6": 1479.98,
    "G6": 1567.98,
    "G#6": 1661.22,
    "A6": 1760.00,
    "A#6": 1864.66,
    "B6": 1975.53
}


In [97]:
general_dir = "Melodies/"
melody_dir = "TurkishMarch/"
melody_name = "TurkishMarch.mp3"

num_segments = 20
threshold = 20

eps_dbscan = 20
min_samples = 2

In [98]:
def freq_to_note_label(freq, note_freqs, tolerance=10):
    closest_note = None
    min_diff = tolerance + 1
    for note, note_freq in note_freqs.items():
        diff = abs(freq - note_freq)
        if diff < min_diff:
            min_diff = diff
            closest_note = note
    return closest_note if min_diff <= tolerance else None

In [99]:
def visualise(segments_filtered_freqs, segments_filtered_ampls, segments_clustered_freqs, segments_clustered_ampls, display_freq):
    indices_to_display = list(range(0, len(segments_filtered_freqs), display_freq))
    num_to_display = len(indices_to_display)

    fig, axes = plt.subplots(num_to_display, 2, figsize=(14, 3 * num_to_display))
    if num_to_display == 1:
        axes = np.array([axes])

    for ax_idx, i in enumerate(indices_to_display):
        # Before clustering
        ax_orig = axes[ax_idx, 0]
        ax_orig.bar(segments_filtered_freqs[i], segments_filtered_ampls[i], width=5)
        ax_orig.set_title(f"Сегмент {i + 1} — до кластеризации")
        ax_orig.set_xlabel("Частота (Гц)")
        ax_orig.set_ylabel("Амплитуда")
        ax_orig.grid(True)

        # After clustering
        ax_cluster = axes[ax_idx, 1]
        if len(segments_filtered_freqs[i]) == 0:
            ax_cluster.set_title(f"Сегмент {i + 1} — пустой")
            ax_cluster.axis('off')
            continue

        if len(segments_clustered_freqs[i]) == 0:
            ax_cluster.set_title(f"Сегмент {i + 1} — все точки шум")
            ax_cluster.axis('off')
            continue

        ax_cluster.bar(segments_clustered_freqs[i], segments_clustered_ampls[i], width=5)
        ax_cluster.set_title(f"Сегмент {i + 1} — после DBSCAN")
        ax_cluster.set_xlabel("Частота (Гц)")
        ax_cluster.set_ylabel("Максимальная амплитуда")
        ax_cluster.grid(True)

        # Note labels
        note_labels_cluster = []
        note_positions_cluster = []
        for freq in segments_clustered_freqs[i]:
            label = freq_to_note_label(freq, note_freqs)
            if label:
                note_labels_cluster.append(label)
                note_positions_cluster.append(freq)

        ax_cluster.set_xticks(note_positions_cluster)
        ax_cluster.set_xticklabels(note_labels_cluster, rotation=45, fontsize=8)

    plt.tight_layout()
    plt.show()


In [100]:
def sound_generation(segments_clustered_freqs, segments_clustered_ampls):
    duration = segment_length / sr  # длительность одного сегмента (в секундах)
    time = np.linspace(0, duration, segment_length, endpoint=False)
    reconstructed_segments = []

    for i in range(num_segments):
        if len(segments_clustered_freqs[i]) == 0:
            reconstructed_segments.append(np.zeros_like(segment))
            continue

        # Sound generation
        segment_signal = np.zeros(segment_length)
        for freq, amp in zip(segments_clustered_freqs[i], segments_clustered_ampls[i]):
            segment_signal += amp * np.sin(2 * np.pi * freq * time)
        segment_signal /= np.max(np.abs(segment_signal) + 1e-9)
        reconstructed_segments.append(segment_signal)

    # Segments union
    reconstructed_audio = np.concatenate(reconstructed_segments)
    sf.write(general_dir + melody_dir + "reconstructed.wav", reconstructed_audio, sr)
    print("✅ Аудио сохранено как 'reconstructed.wav'")


In [101]:
def chords_to_text(segments_clustered_freqs):
    with open(general_dir + melody_dir + "chords.txt", "w", encoding="utf-8") as f:
        for i in range(len(segments_clustered_freqs)):
            chord_notes = []
            for freq in segments_clustered_freqs[i]:
                note = freq_to_note_label(freq, note_freqs)
                if note:
                    chord_notes.append(note)

            if chord_notes:
                f.write(f"Сегмент {i+1}: [{', '.join(chord_notes)}]\n")
            else:
                f.write(f"Сегмент {i+1}: [не определено]\n")


In [102]:
audio_path = general_dir + melody_dir + melody_name
y, sr = librosa.load(audio_path, sr=None, mono=True)

segment_length = len(y) // num_segments

segments_filtered_freqs = []
segments_filtered_ampls = []
segments_clustered_freqs = []
segments_clustered_ampls = []

for i in range(num_segments):
    start = i * segment_length
    end = len(y) if i == num_segments - 1 else (i + 1) * segment_length
    segment = y[start:end]

    n = len(segment)
    freqs = np.fft.fftfreq(n, d=1/sr)
    fft_vals = np.abs(np.fft.fft(segment))

    # Only positive freqs
    freqs = freqs[:n // 2]
    fft_vals = fft_vals[:n // 2]

    # Remove weal freqs
    mask = fft_vals >= threshold
    filtered_freqs = freqs[mask]
    filtered_fft_vals = fft_vals[mask]
    segments_filtered_freqs.append(filtered_freqs)
    segments_filtered_ampls.append(filtered_fft_vals)

    if len(filtered_freqs) == 0:
        segments_clustered_freqs.append(np.array([]))
        segments_clustered_ampls.append(np.array([]))
        continue

    # Clustering
    dbscan = DBSCAN(eps=eps_dbscan, min_samples=min_samples)
    labels = dbscan.fit_predict(filtered_freqs.reshape(-1, 1))

    unique_labels = set(labels)
    unique_labels.discard(-1)

    cluster_freqs = []
    cluster_amps = []

    for label in unique_labels:
        freqs_cluster = filtered_freqs[labels == label]
        amps_cluster = filtered_fft_vals[labels == label]

        if len(freqs_cluster) == 0:
            continue

        idx_max = np.argmax(amps_cluster)
        cluster_freqs.append(freqs_cluster[idx_max])
        cluster_amps.append(amps_cluster[idx_max])

    if not cluster_freqs:
        segments_clustered_freqs.append(np.array([]))
        segments_clustered_ampls.append(np.array([]))
        continue

    cluster_freqs = np.array(cluster_freqs)
    cluster_amps = np.array(cluster_amps)
    sorted_idx = np.argsort(cluster_freqs)
    cluster_freqs = cluster_freqs[sorted_idx]
    cluster_amps = cluster_amps[sorted_idx]

    segments_clustered_freqs.append(cluster_freqs)
    segments_clustered_ampls.append(cluster_amps)

# visualise(segments_filtered_freqs, segments_filtered_ampls, segments_clustered_freqs, segments_clustered_ampls, 20)
sound_generation(segments_clustered_freqs, segments_clustered_ampls)
chords_to_text(segments_clustered_freqs)

✅ Аудио сохранено как 'reconstructed.wav'
