### Contact Sentiment Analysis

The purpose of this script will be to take in dummy contact data and grade the sentiment for use in broader analytics tasks.

* Disclaimer: All data used with the following analysis is dummy data fabricated for use.

In [None]:
# -----------------------------
# Import Packages
# -----------------------------

import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# Download VADER lexicon (run once)
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\bryce\AppData\Roaming\nltk_data...


True

In [None]:
# -----------------------------
# Load Data
# -----------------------------

df = pd.read_csv('input_data')

df.head()

Unnamed: 0,call_id,call_start_datetime,call_end_datetime,contact_category,transcript
0,CALL_0001,2025-01-01T09:00:00,2025-01-01T09:06:00,technical support,"Customer: Hi there, I’m calling because everyt..."
1,CALL_0002,2025-01-01T09:20:00,2025-01-01T09:24:00,technical support,"Customer: Hey, I’m calling to confirm some inf..."
2,CALL_0003,2025-01-01T09:40:00,2025-01-01T09:50:00,general enquiry,"Customer: Hello, I just wanted to check a smal..."
3,CALL_0004,2025-01-01T10:00:00,2025-01-01T10:15:00,billing enquiry,Customer: I’m angry because this keeps happeni...
4,CALL_0005,2025-01-01T10:20:00,2025-01-01T10:25:00,service activation,Customer: I’m extremely unhappy with the servi...


In [5]:
# -----------------------------
# Initialise VADER
# -----------------------------
sia = SentimentIntensityAnalyzer()

# -----------------------------
# Sentiment scoring function
# -----------------------------
def get_sentiment_scores(text):
    scores = sia.polarity_scores(text)
    return pd.Series({
        "sentiment_neg": scores["neg"],
        "sentiment_neu": scores["neu"],
        "sentiment_pos": scores["pos"],
        "sentiment_compound": scores["compound"]
    })

In [None]:
# -----------------------------
# Apply sentiment analysis
# -----------------------------
sentiment_scores = df["transcript"].apply(get_sentiment_scores)
df = pd.concat([df, sentiment_scores], axis=1)

# -----------------------------
# Sentiment classification
# -----------------------------
def classify_sentiment(compound):
    if compound <= -0.5:
        return "very negative"
    elif compound < -0.05:
        return "negative"
    elif compound <= 0.05:
        return "neutral"
    elif compound < 0.5:
        return "positive"
    else:
        return "very positive"

df["sentiment_label"] = df["sentiment_compound"].apply(classify_sentiment)

df.head()

# # -----------------------------
# # Interpretation
# # -----------------------------

# polarity_scores() will always output 4 scores: neg, neu, pos and compound
# sentiment neg, neu and pos all sum to 1
# compound normalises the 3 scores ranging from -1 (very negative) to 1 (very positive)


Unnamed: 0,call_id,call_start_datetime,call_end_datetime,contact_category,transcript,sentiment_neg,sentiment_neu,sentiment_pos,sentiment_compound,sentiment_label
0,CALL_0001,2025-01-01T09:00:00,2025-01-01T09:06:00,technical support,"Customer: Hi there, I’m calling because everyt...",0.0,0.937,0.063,0.2732,positive
1,CALL_0002,2025-01-01T09:20:00,2025-01-01T09:24:00,technical support,"Customer: Hey, I’m calling to confirm some inf...",0.08,0.722,0.198,0.5574,very positive
2,CALL_0003,2025-01-01T09:40:00,2025-01-01T09:50:00,general enquiry,"Customer: Hello, I just wanted to check a smal...",0.0,0.66,0.34,0.9246,very positive
3,CALL_0004,2025-01-01T10:00:00,2025-01-01T10:15:00,billing enquiry,Customer: I’m angry because this keeps happeni...,0.187,0.813,0.0,-0.7695,very negative
4,CALL_0005,2025-01-01T10:20:00,2025-01-01T10:25:00,service activation,Customer: I’m extremely unhappy with the servi...,0.176,0.824,0.0,-0.7178,very negative
