In [573]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
import soundfile as sf

In [574]:
note_freqs = {
    "C4": 261.63,
    "C#4": 277.18,
    "D4": 293.66,
    "D#4": 311.13,
    "E4": 329.63,
    "F4": 349.23,
    "F#4": 369.99,
    "G4": 392.00,
    "G#4": 415.30,
    "A4": 440.00,
    "A#4": 466.16,
    "B4": 493.88,

    "C5": 523.25,
    "C#5": 554.37,
    "D5": 587.33,
    "D#5": 622.25,
    "E5": 659.26,
    "F5": 698.46,
    "F#5": 739.99,
    "G5": 783.99,
    "G#5": 830.61,
    "A5": 880.00,
    "A#5": 932.33,
    "B5": 987.77,

    "C6": 1046.50,
    "C#6": 1108.73,
    "D6": 1174.66,
    "D#6": 1244.51,
    "E6": 1318.51,
    "F6": 1396.91,
    "F#6": 1479.98,
    "G6": 1567.98,
    "G#6": 1661.22,
    "A6": 1760.00,
    "A#6": 1864.66,
    "B6": 1975.53
}


In [575]:
general_dir = "Melodies/"
melody_dir = "TurkishMarch/"
melody_name = "TurkishMarch.mp3"
audio_path = general_dir + melody_dir + melody_name
num_segments = 200
threshold = 20

In [576]:
y, sr = librosa.load(audio_path, sr=None, mono=True)

segment_length = len(y) // num_segments


eps_dbscan = 20
min_samples = 2

def freq_to_note_label(freq, note_freqs, tolerance=10):
    closest_note = None
    min_diff = tolerance + 1
    for note, note_freq in note_freqs.items():
        diff = abs(freq - note_freq)
        if diff < min_diff:
            min_diff = diff
            closest_note = note
    return closest_note if min_diff <= tolerance else None


# fig, axes = plt.subplots(num_segments, 2, figsize=(14, 3 * num_segments))
# if num_segments == 1:
#     axes = np.array([axes])

for i in range(num_segments):
    start = i * segment_length
    end = len(y) if i == num_segments - 1 else (i + 1) * segment_length
    segment = y[start:end]

    n = len(segment)
    freqs = np.fft.fftfreq(n, d=1/sr)
    fft_vals = np.abs(np.fft.fft(segment))

    # Только положительные частоты
    freqs = freqs[:n // 2]
    fft_vals = fft_vals[:n // 2]

    # Убираем слабые частоты
    mask = fft_vals >= threshold
    filtered_freqs = freqs[mask]
    filtered_fft_vals = fft_vals[mask]

    # ===== Левый график — до кластеризации =====
    # ax_orig = axes[i, 0]
    # ax_orig.bar(filtered_freqs, filtered_fft_vals, width=5)
    # ax_orig.set_title(f"Сегмент {i + 1} — до кластеризации")
    # ax_orig.set_xlabel("Частота (Гц)")
    # ax_orig.set_ylabel("Амплитуда")
    # ax_orig.grid(True)

    # ===== Правый график — после кластеризации =====
    # ax_cluster = axes[i, 1]

    if len(filtered_freqs) == 0:
        # ax_cluster.set_title(f"Сегмент {i + 1} — пустой")
        # ax_cluster.axis('off')
        continue

    dbscan = DBSCAN(eps=eps_dbscan, min_samples=min_samples)
    labels = dbscan.fit_predict(filtered_freqs.reshape(-1, 1))

    unique_labels = set(labels)
    unique_labels.discard(-1)

    cluster_freqs = []
    cluster_amps = []

    for label in unique_labels:
        freqs_cluster = filtered_freqs[labels == label]
        amps_cluster = filtered_fft_vals[labels == label]

        if len(freqs_cluster) == 0:
            continue

        idx_max = np.argmax(amps_cluster)
        cluster_freqs.append(freqs_cluster[idx_max])
        cluster_amps.append(amps_cluster[idx_max])

    if not cluster_freqs:
        # ax_cluster.set_title(f"Сегмент {i + 1} — все точки шум")
        # ax_cluster.axis('off')
        continue

    cluster_freqs = np.array(cluster_freqs)
    cluster_amps = np.array(cluster_amps)
    sorted_idx = np.argsort(cluster_freqs)
    cluster_freqs = cluster_freqs[sorted_idx]
    cluster_amps = cluster_amps[sorted_idx]

    # ax_cluster.bar(cluster_freqs, cluster_amps, width=5)
    # ax_cluster.set_title(f"Сегмент {i + 1} — после DBSCAN")
    # ax_cluster.set_xlabel("Частота (Гц)")
    # ax_cluster.set_ylabel("Максимальная амплитуда")
    # ax_cluster.grid(True)

    # Подписи нот на графике после кластеризации
    note_labels_cluster = []
    note_positions_cluster = []
    for freq in cluster_freqs:
        label = freq_to_note_label(freq, note_freqs)
        if label:
            note_labels_cluster.append(label)
            note_positions_cluster.append(freq)

    # ax_cluster.set_xticks(note_positions_cluster)
    # ax_cluster.set_xticklabels(note_labels_cluster, rotation=45, fontsize=8)

# plt.tight_layout()
# plt.show()


In [577]:
duration = segment_length / sr  # длительность одного сегмента (в секундах)
time = np.linspace(0, duration, segment_length, endpoint=False)

reconstructed_segments = []

for i in range(num_segments):
    start = i * segment_length
    end = len(y) if i == num_segments - 1 else (i + 1) * segment_length
    segment = y[start:end]

    n = len(segment)
    freqs = np.fft.fftfreq(n, d=1/sr)
    fft_vals = np.abs(np.fft.fft(segment))
    freqs = freqs[:n // 2]
    fft_vals = fft_vals[:n // 2]

    mask = fft_vals >= threshold
    filtered_freqs = freqs[mask]
    filtered_fft_vals = fft_vals[mask]

    if len(filtered_freqs) == 0:
        reconstructed_segments.append(np.zeros_like(segment))
        continue

    dbscan = DBSCAN(eps=eps_dbscan, min_samples=min_samples)
    labels = dbscan.fit_predict(filtered_freqs.reshape(-1, 1))
    unique_labels = set(labels)
    unique_labels.discard(-1)

    cluster_freqs = []
    cluster_amps = []

    for label in unique_labels:
        freqs_cluster = filtered_freqs[labels == label]
        amps_cluster = filtered_fft_vals[labels == label]
        if len(freqs_cluster) == 0:
            continue
        idx_max = np.argmax(amps_cluster)
        cluster_freqs.append(freqs_cluster[idx_max])
        cluster_amps.append(amps_cluster[idx_max])

    if not cluster_freqs:
        reconstructed_segments.append(np.zeros_like(segment))
        continue

    # ==== Генерация звука ====
    segment_signal = np.zeros(segment_length)
    for freq, amp in zip(cluster_freqs, cluster_amps):
        segment_signal += amp * np.sin(2 * np.pi * freq * time)

    # Нормализация сегмента
    segment_signal /= np.max(np.abs(segment_signal) + 1e-9)

    reconstructed_segments.append(segment_signal)

# ==== Объединение всех сегментов и сохранение ====
reconstructed_audio = np.concatenate(reconstructed_segments)
sf.write(general_dir + melody_dir + "reconstructed.wav", reconstructed_audio, sr)
print("✅ Аудио сохранено как 'reconstructed.wav'")


✅ Аудио сохранено как 'reconstructed.wav'


In [578]:
def freq_to_note_label(freq, note_freqs, tolerance=10):
    # Возвращает ближайшую ноту к частоте (если в пределах tolerance)
    closest_note = None
    min_diff = tolerance + 1
    for note, ref_freq in note_freqs.items():
        diff = abs(freq - ref_freq)
        if diff < min_diff:
            min_diff = diff
            closest_note = note
    return closest_note if min_diff <= tolerance else None

# Сохраняем аккорды с названиями нот
with open("chords.txt", "w", encoding="utf-8") as f:
    for i, segment_freqs in enumerate(reconstructed_segments):
        start = i * segment_length
        end = len(y) if i == num_segments - 1 else (i + 1) * segment_length
        segment = y[start:end]

        n = len(segment)
        freqs = np.fft.fftfreq(n, d=1/sr)
        fft_vals = np.abs(np.fft.fft(segment))
        freqs = freqs[:n // 2]
        fft_vals = fft_vals[:n // 2]

        mask = fft_vals >= threshold
        filtered_freqs = freqs[mask]
        filtered_fft_vals = fft_vals[mask]

        if len(filtered_freqs) == 0:
            f.write(f"Сегмент {i+1}: [нет частот]\n")
            continue

        dbscan = DBSCAN(eps=eps_dbscan, min_samples=min_samples)
        labels = dbscan.fit_predict(filtered_freqs.reshape(-1, 1))
        unique_labels = set(labels)
        unique_labels.discard(-1)

        chord_notes = []
        for label in unique_labels:
            freqs_cluster = filtered_freqs[labels == label]
            amps_cluster = filtered_fft_vals[labels == label]
            if len(freqs_cluster) == 0:
                continue
            idx_max = np.argmax(amps_cluster)
            freq = freqs_cluster[idx_max]
            note = freq_to_note_label(freq, note_freqs)
            if note:
                chord_notes.append(note)

        if chord_notes:
            notes_str = ", ".join(chord_notes)
            f.write(f"Сегмент {i+1}: [{notes_str}]\n")
        else:
            f.write(f"Сегмент {i+1}: [не определено]\n")


In [579]:
# ===== Сохранение аккордов в файл =====
with open(general_dir + melody_dir + "chords.txt", "w", encoding="utf-8") as f:
    for i in range(num_segments):
        start = i * segment_length
        end = len(y) if i == num_segments - 1 else (i + 1) * segment_length
        segment = y[start:end]

        n = len(segment)
        freqs = np.fft.fftfreq(n, d=1/sr)
        fft_vals = np.abs(np.fft.fft(segment))
        freqs = freqs[:n // 2]
        fft_vals = fft_vals[:n // 2]

        # Убираем слабые частоты
        mask = fft_vals >= threshold
        filtered_freqs = freqs[mask]
        filtered_fft_vals = fft_vals[mask]

        if len(filtered_freqs) == 0:
            f.write(f"Сегмент {i+1}: [нет звуков]\n")
            continue

        dbscan = DBSCAN(eps=eps_dbscan, min_samples=min_samples)
        labels = dbscan.fit_predict(filtered_freqs.reshape(-1, 1))

        unique_labels = set(labels)
        unique_labels.discard(-1)

        chord_notes = []
        for label in unique_labels:
            freqs_cluster = filtered_freqs[labels == label]
            amps_cluster = filtered_fft_vals[labels == label]
            if len(freqs_cluster) == 0:
                continue
            idx_max = np.argmax(amps_cluster)
            freq_max = freqs_cluster[idx_max]
            note = freq_to_note_label(freq_max, note_freqs)
            if note:
                chord_notes.append(note)

        if chord_notes:
            f.write(f"Сегмент {i+1}: [{', '.join(chord_notes)}]\n")
        else:
            f.write(f"Сегмент {i+1}: [не определено]\n")
