In [1]:
# imports

import io
import os
from google.cloud import speech_v1p1beta1
from google.cloud import speech_v1
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
from gcloud import storage
from oauth2client.service_account import ServiceAccountCredentials
import audioread

In [2]:
# constant parameters

path_to_directory = "/home/subbu/PRML/Internship/p1/"
path_to_bucket = "gs://rankingtranscript1/"
path_to_json = "/home/subbu/PRML/Internship/p1/rankingtranscript-96316cee08ba.json"
bucket_name = "rankingtranscript1"

In [14]:
# input parameters

file_name_param = "test.wav"

separation_param = False

primary_language_code_param = "en"

In [11]:
# Functions specific to each task for modularity

def upload_to_bucket(file_name):
    print('starting upload')
    storage_client = storage.Client.from_service_account_json(
        '/home/subbu/PRML/Internship/p1/rankingtranscript-96316cee08ba.json')
    bucket = storage_client.get_bucket('rankingtranscript1')
    blob = bucket.blob(file_name)
    blob.upload_from_filename(path_to_directory+file_name)
    print('uploaded')
    return blob

def pull_from_directory(file_name):
    with io.open(path_to_directory+file_name, "rb") as f:
        content = f.read()
        f.close()
    audio = {"content": content}
    return audio

def process_audio(audio,config,client,lengthy):
    if(lengthy):
        operation = client.long_running_recognize(config, audio)
        response = operation.result()
    else:
        response = client.recognize(config, audio)
    return response

def extract_output(response,separation):
    tot = 0
    cnt = 0
    output = ""
    for result in response.results:
        tot = tot + result.alternatives[0].confidence
        cnt = cnt + 1
        alternative = result.alternatives[0]
        if(separation):
            output = output + (str(result.channel_tag)+" : "+alternative.transcript+"\n")
        else:
            output = output + (alternative.transcript+"\n")
    conf = int((tot/cnt)*100)
    return output,conf

def remove_cloud_data(blob,lengthy):
    if(lengthy):
        try:
            blob.delete()
            print("file removed successfully")
        except:
            print("cannot be deleted, try clearing bucket manually")
    return None
    


In [12]:
# Main Function

def main_transcription_process(file_name_param,primary_language_code_param,separation_param):
    
    client = speech_v1.SpeechClient()
    
    with audioread.audio_open(path_to_directory+file_name_param) as ft:
        print("audio info : ",end="")
        print(ft.channels, ft.samplerate, ft.duration)
        num_channels = ft.channels
        sample_bit_rate = ft.samplerate
        duration = ft.duration
        ft.close()
    
    if(duration > 58):
        lengthy = True
    else:
        lengthy = False
        
    if(lengthy):
        blob = upload_to_bucket(file_name_param)
        storage_uri = path_to_bucket+file_name_param
        audio = {"uri": storage_uri}
    else:
        audio = pull_from_directory(file_name_param)
    
    configuration = {
        "sample_rate_hertz": sample_bit_rate,
        "audio_channel_count": num_channels,
        "enable_separate_recognition_per_channel": separation_param,
        "language_code": primary_language_code_param,
        "enable_automatic_punctuation": True,
        "use_enhanced": True,
    }
    
    response = process_audio(audio,configuration,client,lengthy)
    
    output,confidence = extract_output(response,separation_param)
    
    remove_cloud_data(blob,lengthy)
        
    if(confidence < 70):
        output = "Bad audio data, cannot be transcripted"
    
    return output, confidence

def data_transcriptor(file_name_param,primary_language_code_param,separation_param):
    try:
        return main_transcription_process(file_name_param,primary_language_code_param,separation_param)
    except:
        return "Bad input, refer document for specifications", 0
    

In [13]:
# trail run

print(data_transcriptor(file_name_param,primary_language_code_param,separation_param)[0])

audio info : 1 22050 89.62902494331065
starting upload
uploaded
file removed successfully
("In this global economy your phone need someone who understand how things work in different regions in the world.\n You need someone who understands your phone's call business values and to communicate them to different regions which has diverse business practices languages and culture norms.\n You also need someone who knows how to communicate your friend's intentions to the most effective media.\n My name is Regina and I'm that person.\n I'm just done my master's degree in corporate Communications with all the School of Business or which university my previous work experience has makes me unique. I've worked on three continents and I've had five years. I'm work experience in the areas of international marketing public relations advertising even to management and social media management.\n The industries which I've worked in include telecommunications education art design and fashion.\n And I al