In [46]:
import nltk

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from gensim import corpora, models
import gensim
import pandas as pd

In [47]:
df_new = pd.read_csv('Vodaphone_review_dataset.csv')

df_new.head()

Unnamed: 0,Rating,Review,Name,Location,Date,Clean_Review,Sentiment,Sentiment Label,Churn_Keyword,Churn,Review_Date,Review_Time,Weekday,Month,Review_Length,Word_Count
0,1,"Trying to buy broadband through Uswitch, then ...",Alfie Calas,GB,2025-06-05 22:03:24+00:00,trying to buy broadband through uswitch then h...,-0.02,neutral,1,1,2025-06-05,22:03:24,Thursday,June,280,48
1,5,Sona did a great job either my trade in and co...,Julliette,GB,2025-06-05 21:51:17+00:00,sona did a great job either my trade in and co...,0.7,positive,0,0,2025-06-05,21:51:17,Thursday,June,68,13
2,1,"One of the worst , if not the worst when it co...",Vlad Ureche,GB,2025-06-05 21:28:36+00:00,one of the worst if not the worst when it come...,-0.2,neutral,0,1,2025-06-05,21:28:36,Thursday,June,154,34
3,1,How those people get 4.6 rate that’s a joke my...,Adam Farbotko,GB,2025-06-05 21:14:08+00:00,how those people get rate thats a joke my full...,0.275,positive,0,1,2025-06-05,21:14:08,Thursday,June,240,44
4,1,Held to ransom by a ‘reputable’ company. Purch...,Chloe,GB,2025-06-05 20:24:59+00:00,held to ransom by a reputable company purchase...,-0.003634,neutral,1,1,2025-06-05,20:24:59,Thursday,June,1620,320


In [48]:
complaints_df = df_new[df_new['Sentiment Label'].str.lower() != 'positive'].copy()

praise_df = df_new[df_new['Sentiment Label'].str.lower() == 'positive'].copy()


In [49]:
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\deela\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\deela\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [50]:
# Preprocessing text

stop_words = set(stopwords.words('english'))
def preprocess(text):
    tokens = word_tokenize(text.lower())
    return [word for word in tokens if word.isalpha() and word not in stop_words]

complaints_df['tokens'] = complaints_df['Clean_Review'].apply(
    lambda x: preprocess(x) if pd.notnull(x) else []
)


In [51]:
# Creating dictionary and corpus
dictionary = corpora.Dictionary(complaints_df['tokens'])
corpus = [dictionary.doc2bow(text) for text in complaints_df['tokens']]


In [52]:
# Train LDA model (start with 5 topics)

lda_model = gensim.models.LdaModel(
    corpus=corpus,
    id2word=dictionary,
    num_topics=5,
    passes=10,
    random_state=42
)


In [53]:
# Displaying the topics found by the LDA model

topics = lda_model.print_topics(num_words=10)
for idx, topic in topics:
    print(f"Topic {idx+1}: {topic}")

Topic 1: 0.008*"th" + 0.006*"tom" + 0.006*"fibre" + 0.005*"jack" + 0.005*"explained" + 0.005*"helpful" + 0.005*"vodafone" + 0.004*"manoj" + 0.004*"connect" + 0.003*"cable"
Topic 2: 0.023*"vodafone" + 0.012*"service" + 0.010*"customer" + 0.009*"would" + 0.008*"get" + 0.008*"broadband" + 0.008*"contract" + 0.008*"told" + 0.008*"phone" + 0.006*"time"
Topic 3: 0.038*"phone" + 0.034*"new" + 0.032*"helpful" + 0.021*"helped" + 0.014*"sim" + 0.013*"upgrade" + 0.010*"thank" + 0.009*"help" + 0.009*"store" + 0.009*"everything"
Topic 4: 0.023*"vodafone" + 0.014*"service" + 0.010*"phone" + 0.009*"customer" + 0.009*"broadband" + 0.008*"would" + 0.008*"contract" + 0.007*"call" + 0.006*"new" + 0.006*"time"
Topic 5: 0.069*"helpful" + 0.019*"store" + 0.016*"extremely" + 0.014*"thank" + 0.013*"really" + 0.012*"us" + 0.011*"knowledgeable" + 0.011*"today" + 0.011*"service" + 0.010*"vodafone"


In [54]:
# getting the dominant topic for each document

def get_topic(doc):
    bow = dictionary.doc2bow(doc)
    topic_probs = lda_model.get_document_topics(bow)
    dominant_topic = max(topic_probs, key=lambda x: x[1])[0]
    return dominant_topic

In [55]:
complaints_df['Topic'] = complaints_df['tokens'].apply(get_topic)

In [56]:
complaints_df.head()

Unnamed: 0,Rating,Review,Name,Location,Date,Clean_Review,Sentiment,Sentiment Label,Churn_Keyword,Churn,Review_Date,Review_Time,Weekday,Month,Review_Length,Word_Count,tokens,Topic
0,1,"Trying to buy broadband through Uswitch, then ...",Alfie Calas,GB,2025-06-05 22:03:24+00:00,trying to buy broadband through uswitch then h...,-0.02,neutral,1,1,2025-06-05,22:03:24,Thursday,June,280,48,"[trying, buy, broadband, uswitch, multiple, er...",3
2,1,"One of the worst , if not the worst when it co...",Vlad Ureche,GB,2025-06-05 21:28:36+00:00,one of the worst if not the worst when it come...,-0.2,neutral,0,1,2025-06-05,21:28:36,Thursday,June,154,34,"[one, worst, worst, comes, signal, get, n, mes...",1
4,1,Held to ransom by a ‘reputable’ company. Purch...,Chloe,GB,2025-06-05 20:24:59+00:00,held to ransom by a reputable company purchase...,-0.003634,neutral,1,1,2025-06-05,20:24:59,Thursday,June,1620,320,"[held, ransom, reputable, company, purchased, ...",1
12,5,Mohamed was brillant and fixed my phone within...,Romy Aitken,GB,2025-06-05 18:46:06+00:00,mohamed was brillant and fixed my phone within...,0.1,neutral,0,0,2025-06-05,18:46:06,Thursday,June,97,18,"[mohamed, brillant, fixed, phone, within, mins...",3
13,1,This is the complaint I submitted to Vodafone:...,Adriana Castorina,GB,2025-06-05 18:44:57+00:00,this is the complaint i submitted to vodafone ...,0.067885,neutral,1,1,2025-06-05,18:44:57,Thursday,June,1513,295,"[complaint, submitted, vodafone, move, flat, o...",3


In [57]:
praise_df.head()

Unnamed: 0,Rating,Review,Name,Location,Date,Clean_Review,Sentiment,Sentiment Label,Churn_Keyword,Churn,Review_Date,Review_Time,Weekday,Month,Review_Length,Word_Count
1,5,Sona did a great job either my trade in and co...,Julliette,GB,2025-06-05 21:51:17+00:00,sona did a great job either my trade in and co...,0.7,positive,0,0,2025-06-05,21:51:17,Thursday,June,68,13
3,1,How those people get 4.6 rate that’s a joke my...,Adam Farbotko,GB,2025-06-05 21:14:08+00:00,how those people get rate thats a joke my full...,0.275,positive,0,1,2025-06-05,21:14:08,Thursday,June,240,44
5,5,Excellent customer service. Vinnie was very he...,Farida Ariori,GB,2025-06-05 20:05:30+00:00,excellent customer service vinnie was very hel...,0.6,positive,0,0,2025-06-05,20:05:30,Thursday,June,62,9
6,5,Excellent service from Rushabh! He explained e...,kawater alismaeel,GB,2025-06-05 19:52:59+00:00,excellent service from rushabh he explained ev...,0.433333,positive,0,0,2025-06-05,19:52:59,Thursday,June,250,42
7,5,Steven listened then provided a number of opti...,Dylan Owen,GB,2025-06-05 19:43:30+00:00,steven listened then provided a number of opti...,0.35,positive,0,0,2025-06-05,19:43:30,Thursday,June,175,30


In [58]:
df_new.Review

0       Trying to buy broadband through Uswitch, then ...
1       Sona did a great job either my trade in and co...
2       One of the worst , if not the worst when it co...
3       How those people get 4.6 rate that’s a joke my...
4       Held to ransom by a ‘reputable’ company. Purch...
                              ...                        
9974    absolutely awful, been with vodaphone for 4 mo...
9975    Had a great experience- until we decided to mo...
9976    Karan and Isaac were both amazing. Give them a...
9977                          Thank you mani your amazing
9978    16 Years of Loyalty, But the Last 1.5 Years Ha...
Name: Review, Length: 9979, dtype: object

In [59]:
# Defining responses for each topic

topic_intents = {
    0: "Router or WiFi Issue",
    1: "Staff and Service Feedback",
    2: "Phone or SIM Setup",
    3: "Broadband or Internet Problem",
    4: "Contract or Plan Concern"
}

topic_responses = {
    0: "It seems you're having trouble with your router or WiFi. Let's work together to get that sorted quickly.",
    1: "Thanks for your feedback on our staff and service. We’ll review your concerns with the relevant team.",
    2: "Appreciate you sharing your experience—let’s make sure your new phone or SIM setup is working as expected.",
    3: "We're sorry about the broadband or internet issues you've faced. We’re investigating and working to improve reliability.",
    4: "It looks like there’s frustration with your contract or service plan. We’ll help clarify and resolve that for you."
}



In [60]:
# Mapping topics to responses

complaints_df['Response'] = complaints_df['Topic'].map(topic_responses)


In [61]:
complaints_df.Response

0       We're sorry about the broadband or internet is...
2       Thanks for your feedback on our staff and serv...
4       Thanks for your feedback on our staff and serv...
12      We're sorry about the broadband or internet is...
13      We're sorry about the broadband or internet is...
                              ...                        
9971    It looks like there’s frustration with your co...
9973    Appreciate you sharing your experience—let’s m...
9974    Thanks for your feedback on our staff and serv...
9975    Thanks for your feedback on our staff and serv...
9978    Thanks for your feedback on our staff and serv...
Name: Response, Length: 3139, dtype: object

In [62]:
def generate_response(text, complaints_df):
    # Look up sentiment from the dataset
    sentiment_row = complaints_df[complaints_df['Clean_Review'] == text]
    
    if not sentiment_row.empty:
        sentiment = sentiment_row['Sentiment Label'].values[0].lower()
    else:
        sentiment = "neutral"  # fallback if not found

    if sentiment == "positive":
        return "Thanks for your feedback! We're glad you're happy with our service 😊"
    else:
        tokens = preprocess(text)
        bow = dictionary.doc2bow(tokens)
        topic_probs = lda_model.get_document_topics(bow)
        dominant_topic = max(topic_probs, key=lambda x: x[1])[0]
        return topic_responses.get(dominant_topic, "Thank you for reaching out.")


In [74]:
sample_review = complaints_df['Clean_Review'].sample(1).to_list()

In [75]:
sample_review


['the staff were able to come up with a solution to a problem of getting my fibre account transferred to my new address that had been deleted by a member of staff from the call centreamber really went above and beyond to set up a new line to the house new fibre contract and unlimited data for my mobile so i wont have to suffer too much while the new fibre contract is processedso a big thank you to amber and the very tolerant deputy manager']

In [73]:
response = generate_response(sample_review[0], complaints_df)
print("Generated Response:", response)

Generated Response: We're sorry about the broadband or internet issues you've faced. We’re investigating and working to improve reliability.


In [66]:
dictionary.save("lda_dictionary.dict")


In [67]:

lda_model.save("lda_model")
