# Speech Recognition by Data Camp
## Part 4 : Processing text transcribed from spoken language  

In [2]:
# Import AudioSegment from Pydub
from pydub import AudioSegment

# import the SpeechRecognition library
import speech_recognition as sr

### Convert audio file to wav

In [3]:
# Create function to convert audio file to wav
def convert_to_wav(filename):
    """Takes an audio file of non .wav format and converts to .wav"""
    
    folder = "/".join(filename.split("/")[:-1])
    # Import audio file
    audio = AudioSegment.from_file(filename)

    # Create new filename
    new_filename = folder+"/"+ filename.split("/")[-1].split(".")[0] + ".wav"
    
    # Export file as .wav
    audio.export(new_filename, format="wav")
    print(f"Converting {filename} to {new_filename}...")


### get attributes

In [4]:
def show_pydub_stats(filename):
    """Returns different audio attributes related to an audio file."""
    # Create AudioSegment instance
    audio_segment = AudioSegment.from_file(filename)
    
    # Print audio attributes and return AudioSegment instance
    print(f"Channels: {audio_segment.channels}")
    print(f"Sample width: {audio_segment.sample_width}")
    print(f"Frame rate (sample rate): {audio_segment.frame_rate}")
    print(f"Frame width: {audio_segment.frame_width}")
    print(f"Length (ms): {len(audio_segment)}")
    return audio_segment


### Transcribe Audio to Text

In [5]:
def transcribe_audio(filename):
    """Takes a .wav format audio file and transcribes it to text."""
    # Setup a recognizer instance
    recognizer = sr.Recognizer()

    # Import the audio file and convert to audio data
    audio_file = sr.AudioFile(filename)
    with audio_file as source:
        audio_data = recognizer.record(source)

    # Return the transcribed text
    return recognizer.recognize_google(audio_data)


### Transcribe helper

In [6]:
# Convert mp3 file to wav
convert_to_wav("./data_part_4/ex4_call_1_stereo_formatted_mp3.mp3")

# Check the stats of new file+
call_1_audio_segment = show_pydub_stats("./data_part_4/ex4_call_1_stereo_formatted_mp3.wav")


# Split call_1 to mono
call_1_split = call_1_audio_segment.split_to_mono()

# Export channel 2 (the customer channel)
call_1_split[1].export("./data_part_4/call_1_channel_2.wav",
                       format="wav")


# Transcribe the single channel
print(transcribe_audio("./data_part_4/call_1_channel_2.wav"))

Converting ./data_part_4/ex4_call_1_stereo_formatted_mp3.mp3 to ./data_part_4/ex4_call_1_stereo_formatted_mp3.wav...
Channels: 2
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 4
Length (ms): 54888
hey Daniel this is John I've recently bought a smartphone from you are 3 weeks ago and already having issues that one second grandma serial number it is for 177 I'm very displays how long do you reckon it's going to take me about an hour now right I'm just just really really really really just I've been trying to contact the ports and pass past 3-4 days now and have been put on hold Morgan and not really happy I can't get this issue fixed as fast as possible


### Analyzing sentiment of a phone call

In [7]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

try:
    # Create SentimentIntensityAnalyzer instance
    sid = SentimentIntensityAnalyzer()
except Exception:
    import nltk
    nltk.download('vader_lexicon')
    # Create SentimentIntensityAnalyzer instance
    sid = SentimentIntensityAnalyzer()


# Let's try it on one of our phone calls
call_2_text = transcribe_audio("./data_part_4/ex4_call_2_stereo_native.wav")

# Display text and sentiment polarity scores
print(call_2_text)
print(sid.polarity_scores(call_2_text))


hello my name is Daniel thank you for calling acne Studios how can I best help you are hi Daniel my name is Sally I've recently purchased a smart phone from you guys and extremely happy with it but I just got to learn a little bit more about the message bank OK Google location but I'm finding it hard I got you on the corner of Edward and Elizabeth according to Google according to the maps but damn would you be able to help me in some way because I think I actually walk straight past your shop yeah sure thing I'll thank you Sally that's good to hear you're enjoying it let me let me find out where the nearest stories for you
{'neg': 0.035, 'neu': 0.706, 'pos': 0.259, 'compound': 0.9844}


### Sentiment analysis on formatted text

In [8]:
# Transcribe customer channel of call 2
call_2_channel_2_text = transcribe_audio("./data_part_4/ex4_call_2_channel_2_formatted.wav")

# Display text and sentiment polarity scores
print(call_2_channel_2_text)
print(sid.polarity_scores(call_2_channel_2_text))



oh hi Daniel my name is belly I've recently purchased a smartphone from you guys and extremely happy with them I've just got an issue but I've just got to learn a little bit more about the message Frank I had Google the location but I'm finding it hard I don't you want the corner of Edward and Elizabeth according to Google according to the maps but damn would you be able to help me in some way because I think I actually walk straight past your shop
{'neg': 0.071, 'neu': 0.836, 'pos': 0.092, 'compound': 0.443}


### Sentiment analysis on formatted text sentence by sentence  

call_2_channel_2_text doesn't contain each sentences  
call_2_channel_2_paid_api_text contains each sentences

In [9]:
# Import sent_tokenize from nltk
from nltk import sent_tokenize
import nltk
try:
    sent_tokenize(call_2_channel_2_text)
except Exception:
    nltk.download('punkt')

# Split call 2 channel 2 into sentences and score each
for sentence in sent_tokenize(call_2_channel_2_text):
    print(sentence)
    print(sid.polarity_scores(sentence))

oh hi Daniel my name is belly I've recently purchased a smartphone from you guys and extremely happy with them I've just got an issue but I've just got to learn a little bit more about the message Frank I had Google the location but I'm finding it hard I don't you want the corner of Edward and Elizabeth according to Google according to the maps but damn would you be able to help me in some way because I think I actually walk straight past your shop
{'neg': 0.071, 'neu': 0.836, 'pos': 0.092, 'compound': 0.443}


In [10]:
call_2_channel_2_paid_api_text = """Hello and welcome to acme studios. 
My name's Daniel. How can I best help you? Hi Diane.
This is paid on this call up to see the status of my, 
I'm proctor mortars at three weeks ago, 
and then service is terrible. Okay, Peter, 
sorry to hear about that. Hey, Peter, 
before we go on, do you mind just, uh, 
is there something going on with your microphone? 
I can't quite hear you. Is this any better? 
Yeah, that's much better. And sorry, what was, 
what was it that you said when you first first started speaking?
So I ordered a product from you guys three weeks ago and, uh, it's, 
it's currently on July 1st and I haven't received a provocative, again, 
three weeks to a full four weeks down line. This service is terrible. Okay. 
Well, what's your order id? I'll, uh, I'll start looking into that for you. 
Six, nine, eight, seven five. Okay. Thank you."""

# Split call 2 channel 2 into sentences and score each
for sentence in sent_tokenize(call_2_channel_2_paid_api_text):
    print(sentence)
    print(sid.polarity_scores(sentence))

Hello and welcome to acme studios.
{'neg': 0.0, 'neu': 0.625, 'pos': 0.375, 'compound': 0.4588}
My name's Daniel.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
How can I best help you?
{'neg': 0.0, 'neu': 0.303, 'pos': 0.697, 'compound': 0.7845}
Hi Diane.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
This is paid on this call up to see the status of my, 
I'm proctor mortars at three weeks ago, 
and then service is terrible.
{'neg': 0.114, 'neu': 0.886, 'pos': 0.0, 'compound': -0.4767}
Okay, Peter, 
sorry to hear about that.
{'neg': 0.159, 'neu': 0.61, 'pos': 0.232, 'compound': 0.1531}
Hey, Peter, 
before we go on, do you mind just, uh, 
is there something going on with your microphone?
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
I can't quite hear you.
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
Is this any better?
{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
Yeah, that's much better.
{'neg': 0.0, 'neu': 0.282, 'pos': 0.718, 'compound

### Named entity recognition in spaCy

In [7]:
import spacy

# Transcribe call 4 channel 2
call_4_channel_2_text = transcribe_audio("./data_part_4/ex4_call_4_channel_2_formatted.wav")

# Create a spaCy language model instance
nlp = spacy.load("en_core_web_sm")

# Create a spaCy doc with call 4 channel 2 text
doc = nlp(call_4_channel_2_text)

# Check the type of doc
print("type(doc) : ",type(doc))

type(doc) <class 'spacy.tokens.doc.Doc'>
awful 0
I 6
Daniel 8
my 15
name 18
is 23
Ian 26
and 30
I 34
've 35
recently 39
just 48
purchased 53
a 63
smart 65
phone 71
from 77
you 82
and 86
I 90
'm 91
very 94
happy 99
with 105
the 110
product 114
I 122
'd 123
like 126
to 131
order 134
another 140
one 148
for 152
my 156
friend 159
who 166
lives 170
in 176
Sydney 179
and 186
have 190
it 195
delivered 198
I 208
'm 209
pretty 212
sure 219
it 224
's 226
model 229
315 235
I 239
can 241
check 245
that 251
for 256
you 260
and 264
I 268
'll 269
give 273
you 278
my 282
details 285
and 293
if 297
you 300
'd 303
like 306
to 311
take 314
my 319
details 322
and 330
I 334
will 336
also 341
give 346
you 351
the 355
address 359
thank 367
you 373
excellent 377


### Show Token in doc

In [10]:
# Show tokens in doc
for token in doc:
    print(token.text, token.idx)


awful 0
I 6
Daniel 8
my 15
name 18
is 23
Ian 26
and 30
I 34
've 35
recently 39
just 48
purchased 53
a 63
smart 65
phone 71
from 77
you 82
and 86
I 90
'm 91
very 94
happy 99
with 105
the 110
product 114
I 122
'd 123
like 126
to 131
order 134
another 140
one 148
for 152
my 156
friend 159
who 166
lives 170
in 176
Sydney 179
and 186
have 190
it 195
delivered 198
I 208
'm 209
pretty 212
sure 219
it 224
's 226
model 229
315 235
I 239
can 241
check 245
that 251
for 256
you 260
and 264
I 268
'll 269
give 273
you 278
my 282
details 285
and 293
if 297
you 300
'd 303
like 306
to 311
take 314
my 319
details 322
and 330
I 334
will 336
also 341
give 346
you 351
the 355
address 359
thank 367
you 373
excellent 377


### Show sentences in doc

In [11]:
# Show sentences in doc
for sentence in doc.sents:
    print(sentence)

awful I Daniel my name is Ian
and I've recently just purchased a smart phone from you
and I'm very happy with the product I'd like to order another one for my friend who lives in Sydney and
have it delivered
I'm pretty sure
it's model 315
I can check that for you
and I'll give you my details
and if you'd like to take my details and I will also give you the address thank you excellent


### Show named entities and their labels

In [12]:
# Show named entities and their labels
for entity in doc.ents:
    print(entity.text, entity.label_)

Daniel PERSON
Ian PERSON
Sydney GPE
315 CARDINAL


### Creating a custom named entity in spaCy

In [16]:
# Import EntityRuler class
from spacy.pipeline import EntityRuler

# Create EntityRuler instance
ruler = EntityRuler(nlp)

# Define pattern for new entity
ruler.add_patterns([{"label": "PRODUCT", "pattern": "smartphone"}])

# Update existing pipeline
nlp.add_pipe(ruler, before="ner")

# Test new entity
for entity in doc.ents:
    print(entity.text, entity.label_)

Daniel PERSON
Ian PERSON
Sydney GPE
315 CARDINAL


### Preparing audio files for text classification  

#### We only have 2 provided audio files, one before, one after


In [17]:
post_purchase=["./data_part_4/post-purchase-audio-27.wav"]
pre_purchase=["./data_part_4/pre-purchase-audio-25.wav"]

# Convert post purchase
for file in post_purchase:
    print(f"Converting {file} to .wav...")
    convert_to_wav(file)

# Convert pre purchase
for file in pre_purchase:
    print(f"Converting {file} to .wav...")
    convert_to_wav(file)

Converting ./data_part_4/post-purchase-audio-27.wav to .wav...
Converting ./data_part_4/post-purchase-audio-27.wav to ./data_part_4/post-purchase-audio-27.wav...
Converting ./data_part_4/pre-purchase-audio-25.wav to .wav...
Converting ./data_part_4/pre-purchase-audio-25.wav to ./data_part_4/pre-purchase-audio-25.wav...


### Transcribing phone call excerpts

In [22]:
def create_text_list(folder):
    # Create empty list
    text_list = []
    # Go through each file
    for file in folder:
        # Make sure the file is .wav
        if file.endswith(".wav"):
            print(f"Transcribing file: {file}...")

            # Transcribe audio and append text to list
            text_list.append(transcribe_audio(file))   
    return text_list



In [26]:
# Transcribe post and pre purchase text
post_purchase_text = create_text_list(post_purchase)
pre_purchase_text  = create_text_list(pre_purchase)

# Inspect the first transcription of post purchase
print(post_purchase_text[0])

Transcribing file: ./data_part_4/post-purchase-audio-27.wav...
Transcribing file: ./data_part_4/pre-purchase-audio-25.wav...
I'm calling to talk about a package I got yesterday it's I got it but I need to I need some help with setting it up


### Organizing transcribed phone call data

In [37]:
import pandas as pd

train_post_purchase_text = post_purchase_text * 100
train_pre_purchase_text = pre_purchase_text * 100

# Make dataframes with the text
post_purchase_df = pd.DataFrame({"label": "post_purchase",
                                 "text": train_post_purchase_text})
pre_purchase_df = pd.DataFrame({"label": "pre_purchase",
                                "text": train_pre_purchase_text})

# Combine DataFrames
df = pd.concat([post_purchase_df, pre_purchase_df])

# Print the combined DataFrame
print(df.head())

           label                                               text
0  post_purchase  I'm calling to talk about a package I got yest...
1  post_purchase  I'm calling to talk about a package I got yest...
2  post_purchase  I'm calling to talk about a package I got yest...
3  post_purchase  I'm calling to talk about a package I got yest...
4  post_purchase  I'm calling to talk about a package I got yest...


### Create a spoken language text classifier

In [38]:
# Import text classification packages
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split

In [44]:
# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split( df["text"], df["label"], test_size=0.3)

In [49]:
# Build the text_classifier as an sklearn pipeline
text_classifier = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('classifier', MultinomialNB()),
])

# Fit the classifier pipeline on the training data
text_classifier.fit(X_train, y_train)

Pipeline(steps=[('vectorizer', CountVectorizer()),
                ('tfidf', TfidfTransformer()),
                ('classifier', MultinomialNB())])

In [53]:
# Make predictions and compare them to test labels
predictions = text_classifier.predict(X_test)
accuracy = 100 * np.mean(predictions == y_test)
print(f"The model is {accuracy:.2f}% accurate.")

The model is 100.00% accurate.
