-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech2text.py
31 lines (26 loc) · 1.11 KB
/
speech2text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torchaudio
import torch
# Load the tokenizer and model
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# Define a function to transcribe speech
def transcribe_audio(audio_file):
try:
speech_input, _ = torchaudio.load(audio_file)
input_values = processor(speech_input[0].numpy(), return_tensors="pt").input_values
with torch.no_grad():
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
return transcription
except Exception as e:
return str(e)
# Audio file path (assuming it's in the same directory as the script)
audio_file = "namibia.wav" # Replace with the correct file path
result = transcribe_audio(audio_file)
# Save the transcription to a .txt file
output_file = "transcription.txt"
with open(output_file, "w") as text_file:
text_file.write(result)
print("Transcription saved to", output_file)