In [1]:
!pip install SpeechRecognition
!pip install pydub
!pip install spacy
!python3 -m spacy download en_core_web_sm


Collecting SpeechRecognition
  Downloading speechrecognition-3.14.5-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.5-py3-none-any.whl (32.9 MB)
   ---------------------------------------- 0.0/32.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/32.9 MB 991.0 kB/s eta 0:00:34
    --------------------------------------- 0.6/32.9 MB 9.4 MB/s eta 0:00:04
   -- ------------------------------------- 1.9/32.9 MB 17.8 MB/s eta 0:00:02
   --- ------------------------------------ 2.8/32.9 MB 20.1 MB/s eta 0:00:02
   ----- ---------------------------------- 4.3/32.9 MB 21.3 MB/s eta 0:00:02
   ------ --------------------------------- 5.4/32.9 MB 21.5 MB/s eta 0:00:02
   -------- ------------------------------- 6.9/32.9 MB 23.2 MB/s eta 0:00:02
   --------- ------------------------------ 7.4/32.9 MB 21.6 MB/s eta 0:00:02
   ---------- ----------------------------- 8.6/32.9 MB 23.0 MB/s eta 0:00:02
   ----------- ---------------------------- 9.5/32.9 MB 22.4 M

ERROR: Could not install packages due to an OSError: [WinError 2] The system cannot find the file specified: 'C:\\Python312\\Scripts\\sprc.exe' -> 'C:\\Python312\\Scripts\\sprc.exe.deleteme'



Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


Python was not found; run without arguments to install from the Microsoft Store, or disable this shortcut from Settings > Apps > Advanced app settings > App execution aliases.


In [2]:
import pandas as pd

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import speech_recognition as sr
from pydub import AudioSegment

import spacy


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\sapan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## Download the audio file 

In [12]:
import requests

url = "https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav"
output_file = "sample_customer_call.wav"

response = requests.get(url)
with open(output_file, "wb") as f:
    f.write(response.content)

print("Audio file downloaded as sample_customer_call.wav")


Audio file downloaded as sample_customer_call.wav


**Convert the WAV → PCM WAV before transcription**

In [8]:
recognizer = sr.Recognizer()

In [16]:
df = pd.read_csv("customer_call.csv")

In [17]:
df.head()

Unnamed: 0,index,text,sentiment_label
0,0,how's it going Arthur I just placed an order w...,negative
1,1,yeah hello I'm just wondering if I can speak t...,neutral
2,2,hey I receive my order but it's the wrong size...,negative
3,3,hi David I just placed an order online and I w...,neutral
4,4,hey I bought something from your website the o...,negative


**Initialize VADER sentiment model**

VADER is lexicon + rule-based (not deep learning).
It outputs 4 scores:
- pos, neu, neg (0–1)
- compound (-1 to +1) overall sentiment

In [18]:
sid = SentimentIntensityAnalyzer()

In [19]:
# Analyze sentiment by evaluating compound score generated by Vader SentimentIntensityAnalyzer
def find_sentiment(text):
    scores = sid.polarity_scores(text)
    compound_score = scores['compound']

    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

**Apply predictions to each row**

In [20]:
df['sentiment_predicted'] = df.apply(lambda row: find_sentiment(row["text"]), axis=1)


In [21]:
true_positive = len(df.loc[
    (df['sentiment_predicted'] == df['sentiment_label']) &
    (df['sentiment_label'] == 'positive')
])


## TASK 3 — Named Entity Recognition (NER)

**Load spaCy model**

In [22]:
nlp = spacy.load("en_core_web_sm")

In [23]:
def extract_entities(text):
    doc = nlp(text)
    entities = [ent.text for ent in doc.ents]
    return entities


In [24]:
df['named_entities'] = df['text'].apply(extract_entities)


In [25]:
df.head()

Unnamed: 0,index,text,sentiment_label,sentiment_predicted,named_entities
0,0,how's it going Arthur I just placed an order w...,negative,negative,[Arthur]
1,1,yeah hello I'm just wondering if I can speak t...,neutral,positive,[yesterday]
2,2,hey I receive my order but it's the wrong size...,negative,negative,[]
3,3,hi David I just placed an order online and I w...,neutral,neutral,[David]
4,4,hey I bought something from your website the o...,negative,neutral,[]


**Find most frequent entity overall**

In [26]:
all_entities = [ent for entities in df['named_entities'] for ent in entities]
entities_df = pd.DataFrame(all_entities, columns=['entity'])
entities_counts = entities_df['entity'].value_counts().reset_index()
entities_counts.columns = ['entity', 'count']
most_freq_ent = entities_counts["entity"].iloc[0]

## TASK 4 — Find most similar complaint (Semantic Similarity)

**Process each call into a spaCy Doc**

In [28]:
df['processed_text'] = df['text'].apply(lambda text: nlp(text))

In [29]:
input_query = "wrong package delivery"
processed_query = nlp(input_query)


In [30]:
df['similarity'] = df['processed_text'].apply(
    lambda text: processed_query.similarity(text)
)


  lambda text: processed_query.similarity(text)


In [31]:
df = df.sort_values(by='similarity', ascending=False)
most_similar_text = df["text"].iloc[0]
print("Most similar text: ", most_similar_text)


Most similar text:  wrong package delivered


In [32]:
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(df["sentiment_label"], df["sentiment_predicted"]))


              precision    recall  f1-score   support

    negative       0.75      0.56      0.64        43
     neutral       0.70      0.49      0.58        57
    positive       0.07      1.00      0.12         2

    accuracy                           0.53       102
   macro avg       0.51      0.68      0.45       102
weighted avg       0.71      0.53      0.59       102



In [33]:
df.head()

Unnamed: 0,index,text,sentiment_label,sentiment_predicted,named_entities,processed_text,similarity
81,81,wrong package delivered,negative,negative,[],"(wrong, package, delivered)",0.526012
61,61,I just placed an order and I was wondering if ...,neutral,neutral,[],"(I, just, placed, an, order, and, I, was, wond...",0.295107
95,95,yeah hi Tommy I just placed an order with you ...,negative,negative,[Tommy],"(yeah, hi, Tommy, I, just, placed, an, order, ...",0.269931
72,72,I just ordered the new remote control car off ...,neutral,neutral,[],"(I, just, ordered, the, new, remote, control, ...",0.253708
14,14,I've just bought a product new guys and I want...,neutral,positive,[Caesar],"(I, 've, just, bought, a, product, new, guys, ...",0.24243
