## Preprocessing

- mono channel
- 48kHz

In [2]:
import os

os.listdir('../static_files/suno')[:5]

['31-Vochop.mp3',
 '41-Normal Sunset.mp3',
 '11-Let Me Go.mp3',
 '21-what u want.mp3',
 '61-poster 2 2.mp3']

In [3]:
# 1m 14.7s

import os
import librosa
import soundfile as sf

input_dir = '../static_files/suno'
output_dir = '../static_files/suno_mono'
os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.lower().endswith('.mp3'):
        input_path = os.path.join(input_dir, filename)
        y, sr = librosa.load(input_path, sr=None, mono=True)
        output_path = os.path.join(output_dir, filename)
        sf.write(output_path, y, sr)

In [4]:
import librosa
import IPython.display as ipd
import random

mono_dir = '../static_files/suno_mono'
mp3_files = [f for f in os.listdir(mono_dir) if f.lower().endswith('.mp3')]

if mp3_files:
    sample_file = random.choice(mp3_files)
    sample_path = os.path.join(mono_dir, sample_file)
    y, sr = librosa.load(sample_path, sr=None, mono=True)
    
    # 파일 정보 출력
    print(f"파일명: {sample_file}")
    print(f"샘플링 레이트: {sr}")
    print(f"총 샘플 수: {len(y)}")
    print(f"재생 시간(초): {len(y) / sr:.2f}")
    if y.ndim == 1:
        print("이 파일은 mono입니다.")
    elif y.ndim == 2:
        print("이 파일은 stereo입니다.")
    else:
        print(f"채널 수: {y.ndim}")
    
    # 실제로 재생
    ipd.display(ipd.Audio(sample_path))
else:
    print("mp3 파일이 존재하지 않습니다.")

파일명: 44-Moonlight Bounce.mp3
샘플링 레이트: 48000
총 샘플 수: 5792639
재생 시간(초): 120.68
이 파일은 mono입니다.


## Embedding using laion-CLAP

- using laion/larger_clap_music
- 추출 벡터를 보니 l2 norm process 자체가 내부에 있음 (l2 normalization 필요 없음)

In [5]:
# 13.3s

from transformers import ClapProcessor, ClapModel
import torch
import librosa

# laion/larger_clap_music 모델과 processor 불러오기
processor = ClapProcessor.from_pretrained("laion/larger_clap_music")
model = ClapModel.from_pretrained("laion/larger_clap_music")

# CLAP 모델의 기본 sampling rate (48kHz)
TARGET_SR = 48000

embeddings = {}
for mp3_file in mp3_files:
    file_path = os.path.join(mono_dir, mp3_file)
    
    # librosa로 오디오 로드 (48kHz, mono)
    audio_data, sr = librosa.load(file_path, sr=TARGET_SR, mono=True)
    
    # processor로 전처리 (sampling_rate 명시)
    inputs = processor(
        audios=audio_data, 
        sampling_rate=TARGET_SR, 
        return_tensors="pt"
    )
    
    with torch.no_grad():
        audio_emb = model.get_audio_features(**inputs)
    
    embeddings[mp3_file] = audio_emb.cpu().numpy()
    print(f"✓ {mp3_file} - shape: {audio_emb.shape}")

print(f"\n총 {len(embeddings)}개 파일 임베딩 완료!")


  from .autonotebook import tqdm as notebook_tqdm
  inputs = processor(


✓ 31-Vochop.mp3 - shape: torch.Size([1, 512])
✓ 41-Normal Sunset.mp3 - shape: torch.Size([1, 512])
✓ 11-Let Me Go.mp3 - shape: torch.Size([1, 512])
✓ 21-what u want.mp3 - shape: torch.Size([1, 512])
✓ 61-poster 2 2.mp3 - shape: torch.Size([1, 512])
✓ 67-diff way.mp3 - shape: torch.Size([1, 512])
✓ 74-deep night 9.mp3 - shape: torch.Size([1, 512])
✓ 19-Wimps.mp3 - shape: torch.Size([1, 512])
✓ 85-fade dream.mp3 - shape: torch.Size([1, 512])
✓ 68-deep night 3.mp3 - shape: torch.Size([1, 512])
✓ 26-whatever.mp3 - shape: torch.Size([1, 512])
✓ 53-straight go my way ver 5.mp3 - shape: torch.Size([1, 512])
✓ 30-diriring.mp3 - shape: torch.Size([1, 512])
✓ 6-so.mp3 - shape: torch.Size([1, 512])
✓ 60-Poster 2 UK House Remix.mp3 - shape: torch.Size([1, 512])
✓ 40-String Loopy.mp3 - shape: torch.Size([1, 512])
✓ 28-seeya.mp3 - shape: torch.Size([1, 512])
✓ 39-wigle.mp3 - shape: torch.Size([1, 512])
✓ 86-puzzle in city.mp3 - shape: torch.Size([1, 512])
✓ 51-walking ver2.mp3 - shape: torch.Size([1

In [6]:
embeddings['31-Vochop.mp3'].shape

(1, 512)

In [7]:
inputs['input_features'].shape

torch.Size([1, 1, 1001, 64])

In [8]:
import numpy as np

# L2 Normalization 적용
normalized_embeddings = {}
for fname, emb in embeddings.items():
    # emb shape: (1, 512) -> squeeze해서 (512,)로 만들고 normalize
    emb_flat = emb.squeeze()  # (512,)
    l2_norm = np.linalg.norm(emb_flat)
    normalized_emb = emb_flat / l2_norm
    normalized_embeddings[fname] = normalized_emb

# 검증: L2 norm이 1인지 확인
sample_key = list(normalized_embeddings.keys())[0]
sample_norm = np.linalg.norm(normalized_embeddings[sample_key])
print(f"샘플 '{sample_key}'의 L2 norm: {sample_norm:.6f}")
print(f"임베딩 shape: {normalized_embeddings[sample_key].shape}")


샘플 '31-Vochop.mp3'의 L2 norm: 1.000000
임베딩 shape: (512,)


In [9]:
normalized_embeddings['31-Vochop.mp3']

array([-0.01535774, -0.00230278,  0.1368986 , -0.04519567, -0.02516975,
        0.01628478, -0.04515627, -0.0283875 , -0.03730669,  0.06079391,
        0.05097679, -0.01812225, -0.08931892,  0.02313256,  0.00890214,
       -0.0564709 ,  0.07431056,  0.09796149, -0.03721803, -0.02463508,
        0.03783404,  0.03641412,  0.04073813, -0.0709539 , -0.05973309,
        0.04825588, -0.00660667,  0.00669866, -0.04641561,  0.03032436,
        0.03980207,  0.10287806, -0.05493232, -0.04576158,  0.00829523,
       -0.05933741, -0.03750002,  0.01321017, -0.03663053,  0.0355168 ,
        0.09069616, -0.00175598,  0.01084508,  0.01297861, -0.05895422,
       -0.01948457, -0.06033428,  0.06804961, -0.06930993, -0.04940693,
       -0.0437188 ,  0.01570501, -0.00554556, -0.00154619, -0.01765987,
        0.00310682, -0.01209463, -0.03566691,  0.03067628,  0.02211999,
        0.06342281, -0.02612913, -0.01289796, -0.06539589, -0.0512692 ,
       -0.02540876, -0.02142411,  0.04536439, -0.05471431, -0.03

In [10]:
embeddings['31-Vochop.mp3']

array([[-0.01535774, -0.00230278,  0.1368986 , -0.04519567, -0.02516975,
         0.01628478, -0.04515627, -0.0283875 , -0.03730669,  0.06079391,
         0.05097679, -0.01812225, -0.08931892,  0.02313256,  0.00890214,
        -0.0564709 ,  0.07431056,  0.09796149, -0.03721803, -0.02463508,
         0.03783404,  0.03641412,  0.04073813, -0.0709539 , -0.05973309,
         0.04825588, -0.00660667,  0.00669866, -0.04641561,  0.03032436,
         0.03980207,  0.10287806, -0.05493232, -0.04576158,  0.00829523,
        -0.05933741, -0.03750002,  0.01321017, -0.03663053,  0.0355168 ,
         0.09069616, -0.00175598,  0.01084508,  0.01297861, -0.05895422,
        -0.01948457, -0.06033428,  0.06804961, -0.06930993, -0.04940693,
        -0.0437188 ,  0.01570501, -0.00554556, -0.00154619, -0.01765987,
         0.00310682, -0.01209463, -0.03566691,  0.03067628,  0.02211999,
         0.06342281, -0.02612913, -0.01289796, -0.06539589, -0.0512692 ,
        -0.02540876, -0.02142411,  0.04536439, -0.0

In [11]:
# L2 Normalization 전후 벡터 크기 비교
sample_key = '31-Vochop.mp3'

# 원본 임베딩
original_emb = embeddings[sample_key].squeeze()
original_l2_norm = np.linalg.norm(original_emb)

# 정규화된 임베딩
normalized_emb = normalized_embeddings[sample_key]
normalized_l2_norm = np.linalg.norm(normalized_emb)

print(f"📁 파일: {sample_key}")
print(f"{'='*40}")
print(f"🔹 원본 벡터 L2 norm:     {original_l2_norm:.6f}")
print(f"🔹 정규화 후 L2 norm:     {normalized_l2_norm:.6f}")
print(f"{'='*40}")
print(f"📐 벡터 차원: {original_emb.shape[0]}")
print(f"📉 축소 비율: {normalized_l2_norm / original_l2_norm:.6f}")


📁 파일: 31-Vochop.mp3
🔹 원본 벡터 L2 norm:     1.000000
🔹 정규화 후 L2 norm:     1.000000
📐 벡터 차원: 512
📉 축소 비율: 1.000000


## Write Embeddings to supabase

In [12]:
# 키를 id만 남기고, embedding을 Supabase에 저장하기 쉬운 형태(list)로 변환
supabase_data = []

for filename, emb in embeddings.items():
    # "31-Vochop.mp3" -> "31"
    music_id = filename.split('-')[0]
    
    # numpy array (1, 512) -> list (512,)
    embedding_list = emb.squeeze().tolist()
    
    supabase_data.append({
        "id": int(music_id),
        "embedding": embedding_list
    })

# 정렬 (id 기준)
supabase_data.sort(key=lambda x: x["id"])

# 결과 확인
print(f"총 {len(supabase_data)}개 데이터 변환 완료!")
print(f"\n샘플 데이터:")
print(f"  - id: {supabase_data[0]['id']}")
print(f"  - embedding 타입: {type(supabase_data[0]['embedding'])}")
print(f"  - embedding 길이: {len(supabase_data[0]['embedding'])}")
print(f"  - embedding[:5]: {supabase_data[0]['embedding'][:5]}")


총 84개 데이터 변환 완료!

샘플 데이터:
  - id: 3
  - embedding 타입: <class 'list'>
  - embedding 길이: 512
  - embedding[:5]: [-0.0015601374907419086, 0.0015460016438737512, 0.11782635003328323, -0.03567487373948097, -0.009803659282624722]


In [13]:
supabase_data[0]

{'id': 3,
 'embedding': [-0.0015601374907419086,
  0.0015460016438737512,
  0.11782635003328323,
  -0.03567487373948097,
  -0.009803659282624722,
  0.01628863625228405,
  -0.027252990752458572,
  -0.0421089231967926,
  -0.031626828014850616,
  0.07585335522890091,
  0.06089190021157265,
  -0.01259580533951521,
  -0.09788718074560165,
  0.03793303668498993,
  0.0011754533043131232,
  -0.05081396549940109,
  0.06094101443886757,
  0.08808136731386185,
  -0.02865176647901535,
  -0.033251140266656876,
  0.035608772188425064,
  0.02312365360558033,
  0.04963463917374611,
  -0.08039279282093048,
  -0.060359857976436615,
  0.06477507203817368,
  -0.016764162108302116,
  0.01149076223373413,
  -0.04643365368247032,
  0.022508271038532257,
  0.04422696679830551,
  0.09631198644638062,
  -0.03134860843420029,
  -0.02712571993470192,
  -0.015977226197719574,
  -0.0578671433031559,
  -0.038377974182367325,
  0.020348375663161278,
  -0.021763408556580544,
  0.042162951081991196,
  0.084504649043083

In [14]:
supabase_data

[{'id': 3,
  'embedding': [-0.0015601374907419086,
   0.0015460016438737512,
   0.11782635003328323,
   -0.03567487373948097,
   -0.009803659282624722,
   0.01628863625228405,
   -0.027252990752458572,
   -0.0421089231967926,
   -0.031626828014850616,
   0.07585335522890091,
   0.06089190021157265,
   -0.01259580533951521,
   -0.09788718074560165,
   0.03793303668498993,
   0.0011754533043131232,
   -0.05081396549940109,
   0.06094101443886757,
   0.08808136731386185,
   -0.02865176647901535,
   -0.033251140266656876,
   0.035608772188425064,
   0.02312365360558033,
   0.04963463917374611,
   -0.08039279282093048,
   -0.060359857976436615,
   0.06477507203817368,
   -0.016764162108302116,
   0.01149076223373413,
   -0.04643365368247032,
   0.022508271038532257,
   0.04422696679830551,
   0.09631198644638062,
   -0.03134860843420029,
   -0.02712571993470192,
   -0.015977226197719574,
   -0.0578671433031559,
   -0.038377974182367325,
   0.020348375663161278,
   -0.021763408556580544,
   

In [19]:
ids = [item['id'] for item in supabase_data]
print(ids)


[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86]


In [20]:
import dotenv, os
from supabase import create_client

dotenv.load_dotenv("../.env.local", override=True)

supabase_client = create_client(
    supabase_url=os.getenv("SUPABASE_URL"),
    supabase_key=os.getenv("SUPABASE_SECRET_KEY"),
)

In [21]:
for item in supabase_data:
    supabase_client.table("tracks") \
        .update({"embeddings": item["embedding"]}) \
        .eq("id", item["id"]) \
        .execute()
print("Supabase 'tracks' 테이블의 각 id에 embeddings 필드를 업데이트하였습니다.")


Supabase 'tracks' 테이블의 각 id에 embeddings 필드를 업데이트하였습니다.
