# Capstone Project

## Part 3 - Cloud Audio Transcription

In this part the automated transcriptions are obtained and each response stored in an array.

In [6]:
import pandas as pd
import numpy as np
import os
import shutil
import capstone

In [7]:
import json

from ibm_watson import SpeechToTextV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

def quick_transcribe(filename, path, content_type='audio/wav', model='en-US_BroadbandModel'):
    '''
    Transcribes audio.
    filename = str
    path = path to file
    content_type = check IBM's API documentation for further info
    model = defaults to US, check IBM's API documentation for further info
    '''
    
    with open(filename+'.txt','w') as output:
        with open(path, 'rb') as chunk:
            response = stt.recognize(audio=chunk, content_type=content_type, model=model).get_result()
            try:
                text = response['results'][0]['alternatives'][0]['transcript']
                if text != None:
                    output.writelines(text)
                    return text
            except:
                pass  
    output.close()
    return

In [8]:
keys = json.loads(open('ibm.json').read())
url = keys['url']
key = keys['apikey']
#performs Auth
auth = IAMAuthenticator(key)
stt = SpeechToTextV1(authenticator=auth)
stt.set_service_url(url)

In [22]:
path = 'data/VAD/'
sources = path + 'chunks'
file_names = ['.'.join(x.split('.')[0::]) for x in os.listdir(sources) if x.endswith('.wav')]
file_list = [os.path.join(sources,fname) for fname in
               os.listdir(sources) if fname.endswith('.wav')] 
file_list.sort()
#file_list

In [13]:
df = pd.read_csv(path+'/timecodes.csv')
df = df[['min','max','dur','in','out']]
df

Unnamed: 0,min,max,dur,in,out
0,35,292,256,1120.084922,9344.708496
1,293,610,316,9376.710922,19521.480077
2,611,982,370,19553.482504,31426.382682
3,983,1410,426,31458.385108,45123.421162
4,1411,1510,198,45155.423589,48323.663798
...,...,...,...,...,...
83,24635,25044,408,788379.773287,801468.765667
84,25045,25341,295,801500.768093,810973.486295
85,25342,25517,350,811005.488721,816605.913334
86,26045,26145,99,833503.194450,836703.437085


In [21]:
file_names.sort()
#file_names

In [15]:
#Obtaining transcriptions per file and appending file info
text = []
for x in range(len(file_list)):
    transcription = quick_transcribe(sources,file_list[x], model='en-AU_BroadbandModel')
    text.append(transcription)
    df.at[x,'transcription'] = transcription
df['file_names'] = file_names
df['file_paths'] = file_list

In [19]:
#checking results
df

Unnamed: 0,min,max,dur,in,out,transcription,file_names,file_paths
0,35,292,256,1120.084922,9344.708496,yeah that is why they're put into your writing...,chunk_s0000.wav,data/VAD/chunks/chunk_s0000.wav
1,293,610,316,9376.710922,19521.480077,yeah it's very very very hardly yeah I don't e...,chunk_s0001.wav,data/VAD/chunks/chunk_s0001.wav
2,611,982,370,19553.482504,31426.382682,but the idea is to try to be as objective as p...,chunk_s0002.wav,data/VAD/chunks/chunk_s0002.wav
3,983,1410,426,31458.385108,45123.421162,yep so so just discussion of what you see here...,chunk_s0003.wav,data/VAD/chunks/chunk_s0003.wav
4,1411,1510,198,45155.423589,48323.663798,so when you hear the term critical thinking wh...,chunk_s0004.wav,data/VAD/chunks/chunk_s0004.wav
...,...,...,...,...,...,...,...,...
83,24635,25044,408,788379.773287,801468.765667,role play,chunk_s0083.wav,data/VAD/chunks/chunk_s0083.wav
84,25045,25341,295,801500.768093,810973.486295,if you have a good one seven north in about th...,chunk_s0084.wav,data/VAD/chunks/chunk_s0084.wav
85,25342,25517,350,811005.488721,816605.913334,here's an article I got on from Harvard Busine...,chunk_s0085.wav,data/VAD/chunks/chunk_s0085.wav
86,26045,26145,99,833503.194450,836703.437085,yep so you already,chunk_s0086.wav,data/VAD/chunks/chunk_s0086.wav


### Save Results

In [20]:
df.to_csv(path+'transcription.csv')

>>