## Creating transcription helper functions

In [45]:
from pydub import AudioSegment
from pydub.playback import play
from pydub.effects import normalize
import speech_recognition as sr
import os

### Converting audio to the right format


In [3]:
# Create function to convert audio file to wav
def convert_to_wav(filename):
    """Takes an audio file of non .wav format and converts to .wav"""
    # Import audio file
    audio = AudioSegment.from_file(filename)

    # Create new filename
    new_filename = filename.split(".")[0] + ".wav"

    # Export file as .wav
    audio.export(new_filename, format="wav")
    print(f"Converting {filename} to {new_filename}...")

In [4]:
# Test the function
convert_to_wav("dataset/ex4_call_1_stereo_mp3.mp3")

Converting dataset/ex4_call_1_stereo_mp3.mp3 to dataset/ex4_call_1_stereo_mp3.wav...


### Finding PyDub stats

In [5]:
def show_pydub_stats(filename):
    """Returns different audio attributes related to an audio file."""
    # Create AudioSegment instance
    audio_segment = AudioSegment.from_file(filename)

    # Print audio attributes and return AudioSegment instance
    print(f"Channels: {audio_segment.channels}")
    print(f"Sample width: {audio_segment.sample_width}")
    print(f"Frame rate (sample rate): {audio_segment.frame_rate}")
    print(f"Frame width: {audio_segment.frame_width}")
    print(f"Length (ms): {len(audio_segment)}")
    return audio_segment

In [6]:
# Try the function
call_1_audio_segment = show_pydub_stats("dataset/ex4_call_1_stereo_mp3.wav")

Channels: 1
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 2
Length (ms): 54888


### Transcribing audio with one line

In [8]:
def transcribe_audio(filename):
    """Takes a .wav format audio file and transcribes it to text."""
    # Setup a recognizer instance
    recognizer = sr.Recognizer()

    # Import the audio file and convert to audio data
    audio_file = sr.AudioFile(filename)
    with audio_file as source:
        audio_data = recognizer.record(source)

    # Return the transcribed text
    return recognizer.recognize_google(audio_data)

In [9]:
# Test the function
print(transcribe_audio("dataset/ex4_call_1_stereo_mp3.wav"))

hello welcome to Acme Studio support lawn mower name is Daniel how can I best help you hey Daniel this is John Audrey


## Sentiment analysis on spoken language text

In [16]:
import nltk
from nltk.tokenize import sent_tokenize
nltk.download("punkt")
nltk.download("vader_lexicon")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\gstankev\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\gstankev\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [12]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [14]:
sid = SentimentIntensityAnalyzer()

print(sid.polarity_scores("This customer service is terrible!"))

{'neg': 0.459, 'neu': 0.541, 'pos': 0.0, 'compound': -0.5255}


In [None]:
call_3_channel_2_text = transcribe_audio("call_3_channel_2.wav")
print(call_3_channel_2_text)

In [None]:
sid.polarity_scores(call_3_channel_2_text)

In [17]:
call_3_paid_api_text = "Okay. Yeah. Hi, Diane. This is paid on this call and ..."

for sentence in sent_tokenize(call_3_paid_api_text):
    print(sentence)
    print(sid.polarity_scores(sentence))

Okay.
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.2263}
Yeah.
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.296}
Hi, Diane.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
This is paid on this call and ...
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


### Analyzing sentiment of a phone call


In [18]:
# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

# Let's try it on one of our phone calls
call_2_text = transcribe_audio("dataset/ex4_call_2_stereo_native.wav")

# Display text and sentiment polarity scores
print(call_2_text)
print(sid.polarity_scores(call_2_text))

hello my name is Daniel thank you for calling Acme Studios how can I best help you from you guys and extremely happy with it I'll just go to East Normandy Street but I'm just going to learn a little bit more about the message Frank I had but I thought you on the corner of Edward and Elizabeth according to Google according to the maps but some would you be able to help me in some way because I think I actually walked straight past your shop Yes she'll thing well thank you Sally that's good to hear you're enjoying it let me let me find out where the nearest stories for you
{'neg': 0.0, 'neu': 0.749, 'pos': 0.251, 'compound': 0.9842}


### Sentiment analysis on formatted text

In [19]:
# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

# Transcribe customer channel of call 2
call_2_channel_2_text = transcribe_audio("dataset/ex4_call_2_channel_2_formatted.wav")

In [20]:
# Display text and sentiment polarity scores
print(call_2_channel_2_text)
print(sid.polarity_scores(call_2_channel_2_text))

all hi Daniel my name is Celie I've recently purchased a smartphone from you guys and extremely happy with it I'll just go to East nobody street but I'm just going to learn a little bit more about the message Frank I have Google like Haitian but I'm finding it I thought you on the corner of Edward and Elizabeth according to Google according to the maps but some would you be able to help me in some way because I think I actually walked straight past yourself
{'neg': 0.0, 'neu': 0.87, 'pos': 0.13, 'compound': 0.8921}


In [22]:
# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

# Split call 2 channel 2 into sentences and score each
for sentence in sent_tokenize(call_2_channel_2_text):
    print(sentence)
    print(sid.polarity_scores(sentence))

all hi Daniel my name is Celie I've recently purchased a smartphone from you guys and extremely happy with it I'll just go to East nobody street but I'm just going to learn a little bit more about the message Frank I have Google like Haitian but I'm finding it I thought you on the corner of Edward and Elizabeth according to Google according to the maps but some would you be able to help me in some way because I think I actually walked straight past yourself
{'neg': 0.0, 'neu': 0.87, 'pos': 0.13, 'compound': 0.8921}


In [24]:
call_2_channel_2_paid_api_text = "Hello and welcome to acme studios. My name's Daniel. How can I best help you? Hi Diane. This is paid on this call up to see the status of my, I'm proctor mortars at three weeks ago, and then service is terrible. Okay, Peter, sorry to hear about that. Hey, Peter, before we go on, do you mind just, uh, is there something going on with your microphone? I can't quite hear you. Is this any better? Yeah, that's much better. And sorry, what was, what was it that you said when you first first started speaking?  So I ordered a product from you guys three weeks ago and, uh, it's, it's currently on July 1st and I haven't received a provocative, again, three weeks to a full four weeks down line. This service is terrible. Okay. Well, what's your order id? I'll, uh, I'll start looking into that for you. Six, nine, eight, seven five. Okay. Thank you."

In [25]:
# Create SentimentIntensityAnalyzer instance
sid = SentimentIntensityAnalyzer()

# Split channel 2 paid text into sentences and score each
for sentence in sent_tokenize(call_2_channel_2_paid_api_text):
    print(sentence)
    print(sid.polarity_scores(sentence))

Hello and welcome to acme studios.
{'neg': 0.0, 'neu': 0.625, 'pos': 0.375, 'compound': 0.4588}
My name's Daniel.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
How can I best help you?
{'neg': 0.0, 'neu': 0.303, 'pos': 0.697, 'compound': 0.7845}
Hi Diane.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
This is paid on this call up to see the status of my, I'm proctor mortars at three weeks ago, and then service is terrible.
{'neg': 0.114, 'neu': 0.886, 'pos': 0.0, 'compound': -0.4767}
Okay, Peter, sorry to hear about that.
{'neg': 0.159, 'neu': 0.61, 'pos': 0.232, 'compound': 0.1531}
Hey, Peter, before we go on, do you mind just, uh, is there something going on with your microphone?
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
I can't quite hear you.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Is this any better?
{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
Yeah, that's much better.
{'neg': 0.0, 'neu': 0.282, 'pos': 0.718, 'compound': 0.

## Named entity recognition on transcribed text

In [34]:
import spacy
from spacy.pipeline import EntityRuler

In [27]:
nlp =  spacy.load("en_core_web_sm")

In [28]:
doc = nlp("I'd like to talk about a smartphone I ordered on July 31st from your Sydney store, my order number is 40939440. I spoke to Georgia about it last week.")

In [29]:
for token in doc:
    print(token.text, token.idx)

I 0
'd 1
like 4
to 9
talk 12
about 17
a 23
smartphone 25
I 36
ordered 38
on 46
July 49
31st 54
from 59
your 64
Sydney 69
store 76
, 81
my 83
order 86
number 92
is 99
40939440 102
. 110
I 112
spoke 114
to 120
Georgia 123
about 131
it 137
last 140
week 145
. 149


In [31]:
for sentence in doc.sents:
    print(sentence)

I'd like to talk about a smartphone I ordered on July 31st from your Sydney store, my order number is 40939440.
I spoke to Georgia about it last week.


In [32]:
for entity in doc.ents:
    print(entity.text, entity.label_)

July 31st DATE
Sydney GPE
40939440 DATE
Georgia GPE
last week DATE


In [35]:
print(nlp.pipeline)

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec object at 0x00000218A2DC9BF8>), ('tagger', <spacy.pipeline.tagger.Tagger object at 0x00000218AF53CFC0>), ('parser', <spacy.pipeline.dep_parser.DependencyParser object at 0x00000218AF425660>), ('attribute_ruler', <spacy.pipeline.attributeruler.AttributeRuler object at 0x00000218B06909C8>), ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer object at 0x00000218B068B288>), ('ner', <spacy.pipeline.ner.EntityRecognizer object at 0x00000218AF425798>)]


In [37]:
ruler = EntityRuler(nlp)

ruler.add_patterns([{"label":"PRODUCT","pattern":"smartphone"}])

In [41]:
#nlp.add_pipe(ruler, before="ner")
#nlp.pipeline

### Named entity recognition in spaCy

In [46]:
wav_file = AudioSegment.from_file(file="dataset/ex4_call_4_channel_2_formatted.wav", format="wav")
wav_file

In [42]:
# Transcribe call 4 channel 2
call_4_channel_2_text = transcribe_audio("dataset/ex4_call_4_channel_2_formatted.wav")

# Create a spaCy language model instance
nlp = spacy.load("en_core_web_sm")

In [43]:
# Create a spaCy doc with call 4 channel 2 text
doc = nlp(call_4_channel_2_text)

# Check the type of doc
print(type(doc))

<class 'spacy.tokens.doc.Doc'>


In [44]:
for token in doc:
    print(token.text, token.idx)

hi 0
Daniel 3
my 10
name 13
is 18
Ann 21
and 25
I 29
've 30
recently 34
just 43
purchased 48
a 58
smart 60
front 66
buying 72
from 79
you 84
and 88
I 92
'm 93
very 96
happy 101
with 107
the 112
product 116
I 124
'd 125
like 128
to 133
order 136
another 142
one 150
from 154
my 159
friend 162
believes 169
in 178
Sydney 181
and 188
have 192
it 197
delivered 200
I 210
'm 211
pretty 214
sure 221
it 226
's 228
model 231
315 237
I 241
can 243
check 247
that 253
for 258
you 262
and 266
I 270
'll 271
give 275
you 280
more 284
details 289
if 297
you 300
'd 303
like 306
to 311
take 314
my 319
details 322
and 330
I 334
I 336
will 338
also 343
give 348
you 353
the 357
address 361
thank 369
you 375
excellent 379


In [47]:
# Show sentences in doc
for sentence in doc.sents:
    print(sentence)

hi Daniel my name is Ann and I've recently just purchased a smart front buying from you
and I'm very happy with the product I'd like to order another one from my friend believes in Sydney and have it delivered I'm pretty sure it's model 315 I can check that for you
and I'll give you more details if you'd like to take my details and I I will also give you the address thank you excellent


In [48]:
# Show named entities and their labels
for entity in doc.ents:
    print(entity.text, entity.label_)

Daniel PERSON
Sydney GPE
315 CARDINAL


## Classifying transcribed speech with Sklearn

In [49]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split

### Preparing audio files for text classification


In [None]:
'''

# Convert post purchase
for file in post_purchase:
    print(f"Converting {file} to .wav...")
    convert_to_wav(file)

# Convert pre purchase
for file in pre_purchase:
    print(f"Converting {file} to .wav...")
    convert_to_wav(file)
    
    

def create_text_list(folder):
  # Create empty list
  text_list = []
  
  # Go through each file
  for file in folder:
    # Make sure the file is .wav
    if file.endswith(".wav"):
      print(f"Transcribing file: {file}...")
      
      # Transcribe audio and append text to list
      text_list.append(transcribe_audio(file))   
  return text_list

create_text_list(folder)


'''

In [50]:
customer_call_transcriptions = pd.read_csv("dataset/customer_call_transcriptions.csv")

In [52]:
customer_call_transcriptions.head()

Unnamed: 0,label,text
0,pre_purchase,how's it going Arthur I just placed an order w...
1,post_purchase,yeah hello I'm just wondering if I can speak t...
2,post_purchase,hey I receive my order but it's the wrong size...
3,pre_purchase,hi David I just placed an order online and I w...
4,post_purchase,hey I bought something from your website the o...


In [55]:
text_classifier = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('classifier', MultinomialNB()),
])

In [61]:
X_train, X_test, y_train, y_test = train_test_split(customer_call_transcriptions["text"],
                                                    customer_call_transcriptions["label"],
                                                    test_size=0.5)

In [62]:
text_classifier.fit(X_train, y_train)

Pipeline(steps=[('vectorizer', CountVectorizer()),
                ('tfidf', TfidfTransformer()),
                ('classifier', MultinomialNB())])

In [63]:
# Evaluate the MultinomialNB model
predicted = text_classifier.predict(X_test)
accuracy = 100 * np.mean(predicted == y_test)
print(f'The model is {accuracy}% accurate')

The model is 92.15686274509804% accurate
