In [12]:
import pandas as pd
import re
from collections import Counter
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk import ne_chunk, pos_tag, word_tokenize
from nltk.tree import Tree

df_ = pd.read_excel("<customer_chats>.xlsx")

In [15]:
def preprocess_text(text):
    return re.sub(r'\W', ' ', text.lower())
    
def calculate_help_request_rate(texts):
    # Extend this list with more keywords/phrases that indicate a request for help
    keywords = [
        'please assist', 'having trouble', 'need help', 'support request',
        'help needed', 'assistance required', 'can’t figure out', 'stuck with',
        'issue with', 'problem with', 'trouble with', 'unable to', 'difficulty with',
        'can you help', 'help me with', 'question about', 'inquiry about', 'struggling with',
        'challenged by', 'facing an issue', 'how do i', 'how to', 'there’s a problem',
        'it’s not working', 'doesn’t work', 'can’t access', 'need information on',
        'seeking guidance', 'require assistance', 'technical support', 'customer support',
        'service desk', 'help desk', 'not functioning', 'error with', 'failure with',
        'malfunctioning', 'defective', 'not able to', 'how can i', 'assist me with',
        'guidance on', 'advice on', 'help with', 'troubleshoot', 'fixing', 'resolving'
    ]

    # Count occurrences of help-related keywords/phrases
    count = 0
    for text in texts:
        clean_text = preprocess_text(text)
        if any(keyword in clean_text for keyword in keywords):
            count += 1

    return count / len(texts) if texts else 0

# Example usage
texts = [
    "I'm having trouble logging in, can you assist?",
    "How do I reset my password?",
    "Everything works great, thanks!",
    "I need help with my account settings.",
    "Who can I speak to about billing issues?"
]

help_request_rate = calculate_help_request_rate(texts)
print(f'Help Request Rate: {help_request_rate}')


Help Request Rate: 0.4


In [23]:
## more granular help request rates and categorizing help requests

# Dictionary of keywords for identifying help requests 
example_keywords_forhelp = {
# General Help Requests
"Direct Requests for Help" : ["need help", "assistance required", "can you help", "seeking help", "please assist", "support needed", "help needed", "requesting assistance"],
"Questions Indicating Need for Help": [ "how can I", "what should I do", "can someone explain", "I don't understand", "I'm struggling with", "I'm not sure how to", "could you guide"],
"Descriptive Terms of Difficulty": ["having trouble", "facing an issue", "problem with", "difficulties with", "challenge in", "stuck on", "trouble with", "can't figure out"],
## Technical Help Requests
"Technical Issues": [ "error", "issue", "bug", "problem", "technical support", "not working", "malfunction", "crash", "glitch", "troubleshoot"],
"Software/Hardware Terms": ["software", "application", "app", "system", "device", "hardware", "tool", "program"],

}

# Function to check for help request keywords in a list of messages
def identify_help_requests(messages, keyword_dict):
    flagged_messages = []
    for message in messages:
        for category in keyword_dict:
            for keyword in keyword_dict[category]:
                if re.search(r"\b" + re.escape(keyword) + r"\b", message, re.IGNORECASE):
                    flagged_messages.append(message)
                    break  # Break to avoid flagging the same message multiple times
    return flagged_messages


In [None]:
def calculate_normalized_utilization(resource_traffic, total_traffic):
    """
    Calculate the normalized utilization rate for resources.

    :param resource_traffic: Dictionary with resource names as keys and their traffic numbers as values.
    :param total_traffic: Total traffic number for the same period.
    :return: Dictionary with resource names as keys and their normalized utilization rates as values.
    """
    if total_traffic == 0:
        return {resource: 0 for resource in resource_traffic}  # Avoid division by zero

    return {resource: (visits / total_traffic) for resource, visits in resource_traffic.items()}

# Example usage
total_traffic = 10000  # Total visits on the website
resource_traffic = {
    'veteran support services': 200,
    'resource for ex-convicts': 150,
    'job search tool': 300,
    'career interest assessment': 200,
}

normalized_utilization = calculate_normalized_utilization(resource_traffic, total_traffic)
print(normalized_utilization)


In [32]:
texts = df_["TEXT"].fillna(" ").tolist()
calculate_help_request_rate(texts) # 0.0065 help request rate for the chatbot messages

0.00650525424381231

In [33]:

## nltk version for service specific metrics
def extract_services_and_sentiment(texts):
    sia = SentimentIntensityAnalyzer()
    services_sentiment = {}

    for text in texts:
        sentiment = sia.polarity_scores(text)
        entities = extract_named_entities(text)
        for entity in entities:
            services_sentiment.setdefault(entity, []).append(sentiment)

    return services_sentiment

# generic Named Entity Recognition to identify services - not spacy
def extract_named_entities(text):
    chunked = ne_chunk(pos_tag(word_tokenize(text)))
    return [chunk[0] for chunk in chunked if isinstance(chunk, Tree) and chunk.label() == 'NE']


In [43]:
## spacy version for service specific metrics

import spacy
from collections import defaultdict
from nltk.sentiment import SentimentIntensityAnalyzer

# Load the spaCy model for NER
nlp = spacy.load("en_core_web_md")

# Sentiment Analyzer
sia = SentimentIntensityAnalyzer()


## for keyword matching
def get_match(text, rex):
    if isinstance(rex, (list, tuple, set)):
        rex = '(' + '|'.join(rex) + ')'
    result = re.findall(rex, text)
    return result


def analyze_service_specific_metrics(feedback_texts, match_terms_services=["CareerOneStop"]):
    # Dictionary to hold sentiment scores for each service
    service_sentiments = defaultdict(list)

    for text in feedback_texts:
        # Perform NER to identify services mentioned in the text
        doc = nlp(text)
        services = list(set([ent.text for ent in doc.ents if ent.label_ in ('ORG', 'PRODUCT', 'SERVICE')] + get_match(text, rex=match_terms_services)))
        # Calculate sentiment score for the text
        sentiment_score = sia.polarity_scores(text)['compound']

        # Assign sentiment score to identified services
        for service in services:
            service_sentiments[service].append(sentiment_score)

    # Calculate average sentiment for each service
    average_sentiments = {service: sum(scores) / len(scores) for service, scores in service_sentiments.items() if scores}

    return average_sentiments

# Example usage
feedback_texts = [
    "CareerOneStop is a great resource! ",
    "I love the new features in the Job Search tool, very user-friendly!",
    "The career videos were not helpful at all, very disappointing.",
    "Had a great experience with the Customer Support team, very helpful!",
    "The payment gateway is always down, it's frustrating.",
    "Business Finder is terrible. I can't find anything",
]

service_metrics = analyze_service_specific_metrics(feedback_texts)
print(service_metrics)


{'CareerOneStop': 0.6588, 'Job Search': 0.6696, 'Customer Support': 0.8803, 'Business Finder': -0.4767}
