In [None]:
%pip install pyannote.audio
%pip install numpy==1.26

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_TOKEN")

In [None]:
# instantiate the pipeline
from pyannote.audio import Pipeline

pipeline = Pipeline.from_pretrained(
  "pyannote/speaker-diarization-3.1",
  use_auth_token=HUGGING_FACE_TOKEN
)

In [None]:
import torch

# cuda가 사용 가능한 경우 cuda를 사용하도록 설정
if torch.cuda.is_available():
    pipeline.to(torch.device("cuda"))
    print('cuda is available')
else:
    print('cuda is not available')

In [None]:
# run the pipeline on an audio file
# diarization = pipeline("audio.wav")
diarization = pipeline("../audio/싼기타_비싼기타.mp3")

In [8]:
# dump the diarization output to disk using RTTM format
with open("싼기타_비싼기타.rttm", "w", encoding='utf-8') as rttm:
    diarization.write_rttm(rttm)

In [None]:
# RTTM을 pandas로 CSV로 변환
import pandas as pd
rttm_patt = "./싼기타_비싼기타.rttm"

df_rttm = pd.read_csv(
    rttm_patt,
    sep=' ',
    header=None,
    names=['type', 'file', 'chnl', 'start', 'duration', 'C1', 'C2', 'speaker_id', 'C3', 'C4']
    )

display(df_rttm)

In [None]:
# 발언이 끝난 시간 추가
df_rttm['end'] = df_rttm['start'] + df_rttm['duration']

display(df_rttm)

In [None]:
# 연속된 발화를 기록하기 위해 number 변수 추가
df_rttm["number"] = None	# number 열 만들고 None으로 초기화
df_rttm.at[0, "number"] = 0

display(df_rttm)

In [None]:
# 화자 번호 매기기
for i in range(1, len(df_rttm)):
    if df_rttm.at[i, "speaker_id"] != df_rttm.at[i-1, "speaker_id"]:
        df_rttm.at[i, "number"] = df_rttm.at[i-1, "number"] + 1
    else:
        df_rttm.at[i, "number"] = df_rttm.at[i-1, "number"]

display(df_rttm.head(10)) 

In [None]:
# 같은 화자끼리 묶어서 정리하기
df_rttm_grouped = df_rttm.groupby("number").agg(
    start=pd.NamedAgg(column='start', aggfunc='min'),
    end=pd.NamedAgg(column='end', aggfunc="max"),
    speaker_id=pd.NamedAgg(column="speaker_id", aggfunc="first"),
)

display(df_rttm_grouped)

In [None]:
# 발화 시간 추가하고 인덱스 제거하기
df_rttm_grouped["duration"] = df_rttm_grouped["end"] - df_rttm_grouped["start"]
df_rttm_grouped = df_rttm_grouped.reset_index(drop=True)
display(df_rttm_grouped)

In [None]:
# 화자 분리 결과를 csv 파일로 저장하기
df_rttm_grouped.to_csv(
    "../audio/싼기타_비싼기타_rttm.csv",
    sep= ',',
    index=False,
)