# SPEECH ANALYTICS
---

## GOOGLE SPEECH TO TEXT API 

### ___Programmed Logic___

In [0]:
import io
import os

# !pip install pydub # Manipulate audio with a simple and easy high level interface - http://pydub.com/
from pydub import AudioSegment

# !pip install --upgrade google-cloud-speech
from google.cloud import speech_v1p1beta1 as speech
from google.cloud.speech_v1p1beta1 import enums
from google.cloud.speech_v1p1beta1 import types
import wave # to Convert to/open Wave format

# Access GCP Storage
from google.cloud import storage

# to convert file to wav format
import subprocess

In [0]:
# uploading the Audio to GCP in bucket created
# 1. Permissions to the storage bucket to be given to allUsers and as Owner - Need to study this

def upload_audio_to_cloud(bucket_name, source_file_name, destination_file_name):
    storage_client = storage.Client()

    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(destination_file_name) 
    blob.upload_from_filename(source_file_name)

In [0]:
# Get info on the frame rate (Hertz Rate e.g 16000 for mp3, 44100 for wav) and channels (if channel is not mon then convert it)

def audio_info(audio_file_name):
    with wave.open(audio_file_name, "rb") as wave_file:
        frame_rate = wave_file.getframerate()
        channels = wave_file.getnchannels()
        return frame_rate,channels

In [0]:
# Converting Stereo sound to Mono Channel sound

def audio_channel_convert(audio_file_name):
    sound = AudioSegment.from_wav(audio_file_name)
    sound = sound.set_channels(1)
    sound.export(audio_file_name, format="wav")

In [0]:
# Audio file processing - Files it will be able to process is MP3 and WAV

def audio_file_processing():

    # Converting mp3 to wav
    if audio_file_name.split('.')[-1]=='mp3':
      subprocess.call(['ffmpeg', '-i', file_name ,filepath + audio_name +'.wav'])
      file_name = filepath + audio_name + '.wav'

    frame_rate, channels = audio_info(file_name)
    
    if channels > 1:
        audio_channel_convert(file_name)


In [0]:
def google_transcribe():
    
    # Uploading Data to GCP Storage
    upload_audio_to_cloud(bucket_name, file_name, file_name.split('/')[-1])
    
    gcs_uri = 'gs://'+ bucket_name +'/' + file_name
    
    client = speech.SpeechClient()
    audio = types.RecognitionAudio(uri = gcs_uri)

    config = types.RecognitionConfig(encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, #https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#google.cloud.speech.v1.RecognitionConfig.AudioEncoding
                                     sample_rate_hertz=frame_rate,
                                     language_code= language, 
                                     model = model, 
                                     use_enhanced = enhance,
                                     enable_speaker_diarization=enable_speaker_diarization,  # enable this if you want speaker wise results
                                     diarization_speaker_count= speaker_count
                                     )
    
    # Detects speech in the audio file
    operation = client.long_running_recognize(config, audio)
    response = operation.result(timeout=10000)

    if(enable_speaker_diarization == True):
        result = response.results[-1]
        words_info = result.alternatives[0].words
        
        tag=1
        speaker=""

        for word_info in words_info:
            if word_info.speaker_tag==tag:
                speaker=speaker+" "+word_info.word
            else:
                transcript += "speaker {}: {}".format(tag,speaker) + '\n'
                tag=word_info.speaker_tag
                speaker=""+word_info.word

        transcript += "speaker {}: {}".format(tag,speaker)

    else:
        for result in response.results:
            transcript += result.alternatives[0].transcript

    transcript_filename = audio_name + '.txt'

    f= open(filepath + transcript_filename,"w+")
    f.write(transcript)
    f.close()


### ___Calling Function and Variables___

In [0]:
from google.colab import files
upload = files.upload()

In [0]:
# Setting up Global Variables

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/elevated-glow-273009-a6eed3bf39be.json"   # setting google gcp application credential

filepath = '/content/'
audio_file_name = 'Call1.mp3'
file_name = filepath + audio_file_name
audio_name = audio_file_name.split('.')[0]

frame_rate = 0
channels = 0

bucket_name = 'piyush_singla_02'

transcript = ''

# https://cloud.google.com/speech-to-text/docs/languages
language = 'en-US'

# Check config for model selection
model = 'phone_call'
enhance = True

# enable if you want speaker level info
enable_speaker_diarization = False 
speaker_count = 2

google_transcribe(audio_file_name)  

print('Call has been processed. Output at:' + filepath + transcript_filename)