# 음성 변환 기술 구현
  
### STT (Speech-to-Text)

• Oenai 설치

In [None]:
!pip install -qq --upgrade openai

• API KEY 로드 및 실행

In [None]:
import os
import getpass
from openai import OpenAI

try :
    API_KEY = os.environ['API_KEY_OPENAI']
except KeyError:
    # os.environ["API_KEY_OPENAI"] = "OpenAI API키 입력"
    os.environ['API_KEY_OPENAI'] = getpass.getpass()
    API_KEY = os.environ['API_KEY_OPENAI']


In [None]:
client = OpenAI(api_key=API_KEY)
speech_output_path = "./files/hey_audio_1.mp3" # 임의로 녹음한 음성 파일

녹음한 내용: 
```
안녕하세요 오늘부터 3일간 헤이마트 행사 기간입니다
```

In [None]:
audio_file= open(speech_output_path, "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
print(transcript.text)

• 위스퍼 모델 직접 실행

In [None]:
!pip install --upgrade git+https://github.com/huggingface/transformers.git accelerate

• 모델 설정 및 함수 실행

In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"

In [None]:
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=30,
    batch_size=16,
    return_timestamps=True,
    torch_dtype=torch_dtype,
    device=device,
)

• 파일 변환

In [None]:
speech_output_path = "./hey_audio_1.mp3"
result_openai = pipe(speech_output_path)
print(result_openai["text"])

• 음성 번역

In [None]:
result_trans = pipe(speech_output_path, generate_kwargs={"task":"translate"})
print(result_trans ["text"])