In [254]:
import speech_recognition as sr
import csv
import os
import whisper
import pandas as pd
from tabulate import tabulate

# Varying directory

In [255]:
work_dir = os.getcwd() + '\\audios'
r = sr.Recognizer()

In [276]:
def list_file_name():
    all_audio = []
    all_indexed_audio = {}
    for file_name in os.listdir(work_dir):
        if file_name.endswith('.wav'):
            all_audio.append(os.path.join(work_dir,file_name))
    for number, files in enumerate(all_audio):
        all_indexed_audio[number] = os.path.basename(files)
    return all_audio, all_indexed_audio #returning an indexed list for ease of use if processing specific audio

In [287]:
full_list, idx_full_list = list_file_name()

In [278]:
def display_list():
    headers = ['Index','File Name']
    print(tabulate(idx_full_list.items(),headers=headers))

In [294]:
def display_result(result):
    print(tabulate(result, headers='keys',tablefmt='grid', maxcolwidths=[None, 120]))

In [280]:
display_list()

  Index  File Name
-------  --------------------------------------------
      0  car4.wav
      1  h5.wav
      2  jurcic-001-120912_124317_0001940_0002325.wav
      3  jurcic-001-120912_124746_0001976_0002481.wav
      4  jurcic-001-120912_125002_0001940_0002174.wav
      5  jurcic-001-120912_125244_0002021_0002372.wav
      6  jurcic-001-120912_125521_0001945_0002242.wav
      7  jurcic-001-120912_134010_0001997_0002402.wav
      8  jurcic-001-120912_134951_0002063_0002220.wav
      9  jurcic-001-120912_135053_0002037_0002411.wav
     10  jurcic-001-120912_135142_0002038_0002582.wav
     11  jurcic-001-120912_135345_0001944_0002178.wav
     12  M_0048_11y6m_2.wav
     13  M_0052_14y4m_1.wav
     14  M_0133_8y9m_1.wav
     15  OSR_fr_000_0041_8k.wav
     16  OSR_uk_000_0020_8k.wav
     17  OSR_us_000_0010_8k.wav
     18  OSR_us_000_0011_8k.wav
     19  OSR_us_000_0012_8k.wav
     20  OSR_us_000_0018_8k.wav
     21  OSR_us_000_0030_8k.wav
     22  OSR_us_000_0038_8k.wav
     23  OSR_u

# OpenAI Whisper

In [261]:
model = whisper.load_model('medium')

# One audio process

In [301]:
def choose_one ():
    while True:
        i = input(f'Please select from 0-{len(full_list)-1} to process one audio: ')
        i = i.strip()
        if i.isdigit():
            idx = int(i)
            if idx in range(0, len(full_list)):
                break
            else:
                print(f'{i} is not in range')
        else:
            print(f'{i} is not a digit')
    return idx

In [263]:
def one_select(select):
    selected = []
    selected.append(full_list[select])
    return selected

In [264]:
def one_result(select):
    single_audio = []
    for i in select:
        file = os.path.basename(i)
        transcript = model.transcribe(i)['text']
        single_audio.append({'file_name':file,'transcript':transcript})
    return single_audio

# Multi select audio process

In [267]:
#List the indexes of specific audio files to process
def specific_audio ():
    while True:
        int_entry = []
        raw_entry = input(f'All duplicate digits will be counted as one\nPlease select from 0-{len(full_list)-1} which audio to process: ').split(',')
        for n in raw_entry:
            stripped = n.strip()
            if stripped.isdigit():
                if int(stripped) in range(0,len(full_list)):
                    int_entry.append(int(stripped))
                else:
                    int_entry.append(stripped)
                    print(f'{stripped} is not in range')
            else:
                int_entry.append(stripped)
                print(f'{stripped} is not a digit')
                # str_entry = [item.strip() for item in raw_entry]
        if all(isinstance(item,int) for item in int_entry):
            break
    return set(int_entry)

In [268]:
def multi_select(specific_audio):
    specific_audios = []
    for i in specific_audio:
        specific_audios.append(full_list[int(i)])
    return specific_audios

In [269]:
def multi_result(files):
    whisper_specific = []
    for i in files:
        file = os.path.basename(i)
        whisper_transcript = model.transcribe(i)['text']
        whisper_specific.append({'file_name':file,'transcript':whisper_transcript})
    return whisper_specific

# Process all audio in a folder

In [270]:
def whisper_all_files():
    all_transcript = []
    for files in full_list:
        file = os.path.basename(files)
        whisper_transcript = model.transcribe(files)['text']
        all_transcript.append({'file_name':file,'transcript':whisper_transcript})
    return all_transcript

### CSV save and simple display

In [271]:
def save_as_csv(result):
    output = 'transcript_whisper.csv'
    
    with open(output, mode='w',newline='',encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Track','File Name','Transcription'])
        for number, transcript in enumerate(result,start=1):
            writer.writerow([number, transcript['file_name'],transcript['transcript']])
    return output

In [272]:
def open_csv(result):
    with open(result,mode='r',newline='',encoding='utf-8') as file:
        content = csv.reader(file)
        for row in content:
            print (row)

In [273]:
def display_csv(result):
    df = pd.read_csv(result)
    print (df)

In [None]:
# choose between 1,2,3
print('Type 1 if you want to process one audio')
print('Type 2 if you want to select which audio to process')
print('Type 3 if you want to process all audio:\n')
choice = 0
full_list, idx_full_list = list_file_name()
while choice != 1 or choice != 2 or choice != 3:
    print('Please choose from 1-3\n')
    choice = int(input('1, 2, 3: '))
    # if process one audio
    if choice == 1:
        print('Here are the lists of audios\n')
        display_list()
        idx = choose_one()
        result = (one_result(one_select(idx)))
        break
    # if process selected audio
    elif choice == 2:
        print('Here are the lists of audios\n')
        display_list()
        entry = specific_audio()
        result = multi_result(multi_select(entry))
        break
    # if process all audio
    elif choice == 3:
        print('This may take a while.')
        print('Processing all audio in this list:')
        display_list()
        result = whisper_all_files()
        break
save_as_csv(result)
display_result(result)
input('Press any to close....')