In [6]:
# Step 1: Install the required libraries
!pip install vosk pydub soundfile

# Step 2: Download a small Vosk model (this might take some time)
!wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
!unzip -o vosk-model-small-en-us-0.15.zip  # The -o flag overwrites without confirmation

# Step 3: Mount Google Drive to access the audio file
from google.colab import drive
drive.mount('/content/drive')

# Step 4: Convert the audio file to mono PCM 16kHz using pydub
from pydub import AudioSegment

# Path to your original .wav file in Google Drive (replace with your path)
original_audio_file_path = '/content/drive/MyDrive/extracted_audio.wav





'  # Update this path

# Load the original audio
audio = AudioSegment.from_wav(original_audio_file_path)

# Convert to mono, PCM, 16kHz
audio = audio.set_channels(1)  # Mono
audio = audio.set_frame_rate(16000)  # 16kHz
converted_audio_file_path = '/content/converted_audio.wav'
audio.export(converted_audio_file_path, format='wav')

# Step 5: Transcribe the audio using Vosk
import os
import wave
import json
from vosk import Model, KaldiRecognizer

# Load the Vosk model
model = Model("vosk-model-small-en-us-0.15")

# Open the converted .wav file
wf = wave.open(converted_audio_file_path, "rb")

# Check if the audio file is in the correct format (it should be, after conversion)
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getframerate() > 16000:
    print("Audio file must be WAV format mono PCM with 16kHz or lower.")
else:
    # Initialize recognizer
    rec = KaldiRecognizer(model, wf.getframerate())

    # Transcribe the audio
    transcription = ""
    while True:
        data = wf.readframes(4000)
        if len(data) == 0:
            break
        if rec.AcceptWaveform(data):
            result = rec.Result()
            transcription_json = json.loads(result)
            transcription += transcription_json.get("text", "") + " "

    # Final result
    transcription += json.loads(rec.FinalResult()).get("text", "")
    print("Transcription: ", transcription)


--2024-09-06 06:09:18--  https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
Resolving alphacephei.com (alphacephei.com)... 188.40.21.16, 2a01:4f8:13a:279f::2
Connecting to alphacephei.com (alphacephei.com)|188.40.21.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 41205931 (39M) [application/zip]
Saving to: ‘vosk-model-small-en-us-0.15.zip.2’


2024-09-06 06:09:21 (18.7 MB/s) - ‘vosk-model-small-en-us-0.15.zip.2’ saved [41205931/41205931]

Archive:  vosk-model-small-en-us-0.15.zip
  inflating: vosk-model-small-en-us-0.15/am/final.mdl  
  inflating: vosk-model-small-en-us-0.15/graph/disambig_tid.int  
  inflating: vosk-model-small-en-us-0.15/graph/HCLr.fst  
  inflating: vosk-model-small-en-us-0.15/graph/Gr.fst  
  inflating: vosk-model-small-en-us-0.15/graph/phones/word_boundary.int  
  inflating: vosk-model-small-en-us-0.15/conf/model.conf  
  inflating: vosk-model-small-en-us-0.15/conf/mfcc.conf  
  inflating: vosk-model-small-en-us-0.15/i