In [1]:
import os
import sys
import wave
import json

from vosk import Model, KaldiRecognizer, SetLogLevel
# !pip install vosk
import Word

SetLogLevel(0)

In [2]:
# path to vosk model downloaded from
# https://alphacephei.com/vosk/models
model_path = "./model/vosk-model-en-us-0.22/vosk-model-en-us-0.22"

if not os.path.exists(model_path):
    print(f"Please download the model from https://alphacephei.com/vosk/models and unpack as {model_path}")
    sys.exit()

print(f"Reading your vosk model '{model_path}'...")
model = Model(model_path)
print(f"'{model_path}' model was successfully read")

Reading your vosk model './model/vosk-model-en-us-0.22/vosk-model-en-us-0.22'...
'./model/vosk-model-en-us-0.22/vosk-model-en-us-0.22' model was successfully read


In [3]:
# name of the audio file to recognize
audio_filename = "./audio/output.wav"
# name of the text file to write recognized text
text_filename = "./audio/speech_recognition_systems_vosk_with_timestamps.txt"
video_filename = "./video/sample.mp4"

ffmpeg -i sample.wav -acodec pcm_s16le -ac 1 -ar 8000 output.wav

In [None]:
# #NEED TO WORK ON THIS 
# import subprocess

# command = "ffmpeg -i "+video_filename+" -ab 160k -ac 2 -ar 44100 -vn "+audio_filename

# os.system(command)

In [4]:
if not os.path.exists(audio_filename):
    print(f"File '{audio_filename}' doesn't exist")
    sys.exit()

print(f"Reading your file '{audio_filename}'...")
wf = wave.open(audio_filename, "rb")
print(f"'{audio_filename}' file was successfully read")

Reading your file './audio/output.wav'...
'./audio/output.wav' file was successfully read


In [5]:
 # check if audio is mono wav
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
    print("Audio file must be WAV format mono PCM.")
    sys.exit()

In [6]:
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)

In [7]:
results = []

# recognize speech using vosk model
while True:
    data = wf.readframes(8000)
    if len(data) == 0:
        break
    if rec.AcceptWaveform(data):
        part_result = json.loads(rec.Result())
        results.append(part_result)

part_result = json.loads(rec.FinalResult())
results.append(part_result)

In [8]:
results

[{'result': [{'conf': 1.0, 'end': 1.23, 'start': 0.69, 'word': 'begin'}],
  'text': 'begin'},
 {'result': [{'conf': 1.0, 'end': 2.91, 'start': 2.58, 'word': 'check'},
   {'conf': 1.0, 'end': 3.21, 'start': 2.91, 'word': 'check'},
   {'conf': 1.0, 'end': 3.48, 'start': 3.21, 'word': 'one'},
   {'conf': 1.0, 'end': 3.69, 'start': 3.48, 'word': 'two'},
   {'conf': 1.0, 'end': 4.11, 'start': 3.69, 'word': 'three'},
   {'conf': 1.0, 'end': 5.64, 'start': 5.25, 'word': 'next'},
   {'conf': 1.0, 'end': 6.21, 'start': 5.64, 'word': 'question'},
   {'conf': 0.957199, 'end': 6.81, 'start': 6.45, 'word': 'check'},
   {'conf': 0.893365, 'end': 7.38, 'start': 7.08, 'word': 'these'},
   {'conf': 0.518394, 'end': 7.65, 'start': 7.41, 'word': 'for'},
   {'conf': 0.85234, 'end': 8.19, 'start': 7.65, 'word': 'five'}],
  'text': 'check check one two three next question check these for five'},
 {'result': [{'conf': 1.0, 'end': 10.47, 'start': 10.05, 'word': 'six'},
   {'conf': 1.0, 'end': 10.83, 'start': 

In [9]:
 # convert list of JSON dictionaries to list of 'Word' objects

list_of_words = []
for sentence in results:
    if len(sentence) == 1:
        # sometimes there are bugs in recognition 
        # and it returns an empty dictionary
        # {'text': ''}
        continue
    for obj in sentence['result']:
        w = Word.Word(obj)  # create custom Word object
        list_of_words.append(w)  # and add it to list

In [10]:
nextTimestamps = []

for idx, word in enumerate(list_of_words):
    if word.word == "next" and list_of_words[idx+1].word=="question":
        nextTimestamps.append(word)
        print(word.to_string())

next                 from 5.25 sec to 5.64 sec, confidence is 100.00%, idx: 7
next                 from 12.66 sec to 13.05 sec, confidence is 100.00%, idx: 16
next                 from 21.45 sec to 21.81 sec, confidence is 100.00%, idx: 25


In [11]:
 # forming a final string from the words
text = ''
for r in results:
    text += r['text'] + ' '

print("\tVosk thinks you said:\n")
print(text)

	Vosk thinks you said:

begin check check one two three next question check these for five six seven eight next question check seven one one eleven twelve thirteen next question check check check three four five last  


#Cutting the Video

In [12]:
from moviepy.editor import *

In [13]:
clip = VideoFileClip(video_filename)
# clip1 = clip.subclip(0,5)
# clip1.write_videofile("outputSample.mp4")
clipNum = 0

clip1 = clip.subclip(0, nextTimestamps[0].end)
clip1.write_videofile("./output/output"+str(clipNum)+".mp4")
clipNum+=1

for i in range(len(nextTimestamps)):
    clip = VideoFileClip(video_filename)
    if i != len(nextTimestamps)-1:
        clip1 = clip.subclip(nextTimestamps[i].start, nextTimestamps[i+1].start)
        clip1.write_videofile("./output/output"+str(clipNum)+".mp4")
        clipNum+=1
    else:
        clip1 = clip.subclip(nextTimestamps[i].start)
        clip1.write_videofile("./output/output"+str(clipNum)+".mp4")
        clipNum+=1

Moviepy - Building video ./output/output0.mp4.
MoviePy - Writing audio in output0TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ./output/output0.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ./output/output0.mp4
Moviepy - Building video ./output/output1.mp4.
MoviePy - Writing audio in output1TEMP_MPY_wvf_snd.mp3


                                                                    

MoviePy - Done.
Moviepy - Writing video ./output/output1.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ./output/output1.mp4
Moviepy - Building video ./output/output2.mp4.
MoviePy - Writing audio in output2TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ./output/output2.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ./output/output2.mp4
Moviepy - Building video ./output/output3.mp4.
MoviePy - Writing audio in output3TEMP_MPY_wvf_snd.mp3


                                                                  

MoviePy - Done.
Moviepy - Writing video ./output/output3.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ./output/output3.mp4
