-
Notifications
You must be signed in to change notification settings - Fork 693
/
Copy pathvad_modal.py
41 lines (35 loc) · 1.04 KB
/
vad_modal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import uuid
import torch
from fastapi import UploadFile
from pyannote.audio import Pipeline
# Instantiate pretrained voice activity detection pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vad = Pipeline.from_pretrained(
"pyannote/voice-activity-detection",
use_auth_token=os.getenv('HUGGINGFACE_TOKEN')
).to(device)
# app = App(name='vad')
# image = (
# Image.debian_slim()
# .pip_install("pyannote.audio")
# .pip_install("torch")
# .pip_install("torchaudio")
# )
os.makedirs('_temp', exist_ok=True)
def vad_endpoint(file: UploadFile):
upload_id = str(uuid.uuid4())
file_path = f"_temp/{upload_id}_{file.filename}"
with open(file_path, 'wb') as f:
f.write(file.file.read())
output = vad(file_path)
segments = output.get_timeline().support()
os.remove(file_path)
data = []
for segment in segments:
data.append({
'start': segment.start,
'end': segment.end,
'duration': segment.duration,
})
return data