In [18]:
from google.colab import files
import librosa
import numpy as np
import re


In [20]:
# === UPLOAD ===
print("📤 Envie primeiro o arquivo WAV (voz isolada)")
uploaded_wav = files.upload()
wav_path = list(uploaded_wav.keys())[0]

print("📤 Agora envie o arquivo LRC base (letra + timestamps)")
uploaded_lrc = files.upload()
lrc_path = list(uploaded_lrc.keys())[0]

📤 Envie primeiro o arquivo WAV (voz isolada)


Saving voz_cidade_vizinha.wav to voz_cidade_vizinha (3).wav
📤 Agora envie o arquivo LRC base (letra + timestamps)


Saving lyrics.lrc to lyrics (3).lrc


In [21]:
# === FUNÇÕES AUXILIARES ===
def freq_to_note(freq):
    """Converte frequência Hz para nota musical"""
    if freq <= 0 or np.isnan(freq):
        return "-"
    note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    midi_number = int(round(69 + 12 * np.log2(freq / 440.0)))
    note_name = note_names[midi_number % 12]
    octave = midi_number // 12 - 1
    return f"{note_name}{octave}"

def lrc_time_to_seconds(t):
    mm, ss = t.split(":")
    return int(mm)*60 + float(ss)


In [22]:
# === CARREGA ÁUDIO ===
y, sr = librosa.load(wav_path, sr=None)

# === LÊ LRC BASE ===
with open(lrc_path, "r", encoding="utf-8") as f:
    lines = f.readlines()

time_pattern = re.compile(r"\[(\d+):(\d+\.\d+)\]")
new_lrc_lines = []

In [23]:
# === LOOP PARA CADA LINHA ===
for i, line in enumerate(lines):
    match = time_pattern.match(line)
    if not match:
        new_lrc_lines.append(line)
        continue

    minutes, seconds = match.groups()
    start_time = int(minutes)*60 + float(seconds)

    # Define tempo final da linha
    if i+1 < len(lines):
        next_match = time_pattern.match(lines[i+1])
        if next_match:
            n_minutes, n_seconds = next_match.groups()
            end_time = int(n_minutes)*60 + float(n_seconds)
        else:
            end_time = start_time + 2.0
    else:
        end_time = start_time + 2.0

    # Texto da linha
    text = line.strip().split("]")[-1].strip()
    if len(text) == 0:
        new_lrc_lines.append(line)
        continue

    # --- DETECTAR ONSETS (sílabas) AUTOMATICAMENTE ---
    start_sample = int(start_time*sr)
    end_sample = int(end_time*sr)
    segment = y[start_sample:end_sample]

    # Onsets (picos de ataque da voz)
    onsets = librosa.onset.onset_detect(y=segment, sr=sr, units='time', backtrack=True)
    if len(onsets) == 0:
        # fallback: divide a linha em sílabas pelo número de palavras
        silabas = text.split()
        dur_total = end_time - start_time
        dur_silaba = dur_total / len(silabas)
        silaba_times = [(start_time + i*dur_silaba, start_time + (i+1)*dur_silaba) for i in range(len(silabas))]
    else:
        # Ajusta os tempos dos onsets para o tempo absoluto do áudio
        silaba_times = [(start_time + onsets[j], start_time + onsets[j+1] if j+1 < len(onsets) else end_time) for j in range(len(onsets))]
        silabas = text.split()
        # Se o número de onsets não bater com o número de sílabas, ajusta para usar quantidade mínima
        min_len = min(len(silabas), len(silaba_times))
        silabas = silabas[:min_len]
        silaba_times = silaba_times[:min_len]

    # --- CALCULAR NOTA DE CADA SÍLABA ---
    linha_nova = f"[{minutes}:{seconds}] "
    for s, (t_start, t_end) in zip(silabas, silaba_times):
        s_start_sample = int(t_start*sr)
        s_end_sample = int(t_end*sr)
        sil_segment = y[s_start_sample:s_end_sample]
        if len(sil_segment) > 0:
            f0 = librosa.yin(sil_segment, fmin=50, fmax=500, sr=sr)
            freq = np.mean(f0) if len(f0) > 0 else 0
            nota = freq_to_note(freq)
        else:
            nota = "-"
        linha_nova += f"{s}({nota}) "

    new_lrc_lines.append(linha_nova.strip() + "\n")

In [24]:
# === SALVAR NOVO LRC ===
output_file = "lrc_silaba_automatica_notas.lrc"
with open(output_file, "w", encoding="utf-8") as f:
    f.writelines(new_lrc_lines)

# === DOWNLOAD ===
files.download(output_file)
print("✅ LRC gerado com sílabas detectadas pelo áudio e notas adicionadas!")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ LRC gerado com sílabas detectadas pelo áudio e notas adicionadas!
