In [None]:
import nltk

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from gensim import corpora, models
import gensim
import pandas as pd

In [None]:
df_new = pd.read_csv('Vodaphone_review_dataset.csv')

df_new.head()

In [None]:
complaints_df = df_new[df_new['Sentiment Label'].str.lower() != 'positive'].copy()

praise_df = df_new[df_new['Sentiment Label'].str.lower() == 'positive'].copy()


In [None]:
nltk.download('punkt_tab')
nltk.download('stopwords')

In [None]:
# Preprocessing text

stop_words = set(stopwords.words('english'))
def preprocess(text):
    tokens = word_tokenize(text.lower())
    return [word for word in tokens if word.isalpha() and word not in stop_words]

complaints_df['tokens'] = complaints_df['Clean_Review'].apply(
    lambda x: preprocess(x) if pd.notnull(x) else []
)


In [None]:
# Creating dictionary and corpus
dictionary = corpora.Dictionary(complaints_df['tokens'])
corpus = [dictionary.doc2bow(text) for text in complaints_df['tokens']]


In [None]:
# Train LDA model (start with 5 topics)

lda_model = gensim.models.LdaModel(
    corpus=corpus,
    id2word=dictionary,
    num_topics=5,
    passes=10,
    random_state=42
)


In [None]:
# Displaying the topics found by the LDA model

topics = lda_model.print_topics(num_words=10)
for idx, topic in topics:
    print(f"Topic {idx+1}: {topic}")

In [None]:
# getting the dominant topic for each document

def get_topic(doc):
    bow = dictionary.doc2bow(doc)
    topic_probs = lda_model.get_document_topics(bow)
    dominant_topic = max(topic_probs, key=lambda x: x[1])[0]
    return dominant_topic

In [None]:
complaints_df['Topic'] = complaints_df['tokens'].apply(get_topic)

In [None]:
complaints_df.head()

In [None]:
praise_df.head()

In [None]:
df_new.Review

In [None]:
# Defining responses for each topic

topic_intents = {
    0: "Router or WiFi Issue",
    1: "Staff and Service Feedback",
    2: "Phone or SIM Setup",
    3: "Broadband or Internet Problem",
    4: "Contract or Plan Concern"
}

topic_responses = {
    0: "It seems you're having trouble with your router or WiFi. Let's work together to get that sorted quickly.",
    1: "Thanks for your feedback on our staff and service. We’ll review your concerns with the relevant team.",
    2: "Appreciate you sharing your experience—let’s make sure your new phone or SIM setup is working as expected.",
    3: "We're sorry about the broadband or internet issues you've faced. We’re investigating and working to improve reliability.",
    4: "It looks like there’s frustration with your contract or service plan. We’ll help clarify and resolve that for you."
}



In [None]:
# Mapping topics to responses

complaints_df['Response'] = complaints_df['Topic'].map(topic_responses)


In [None]:
complaints_df.Response

In [None]:
def generate_response(text, complaints_df):
    # Look up sentiment from the dataset
    sentiment_row = complaints_df[complaints_df['Clean_Review'] == text]
    
    if not sentiment_row.empty:
        sentiment = sentiment_row['Sentiment Label'].values[0].lower()
    else:
        sentiment = "neutral"  # fallback if not found

    if sentiment == "positive":
        return "Thanks for your feedback! We're glad you're happy with our service 😊"
    else:
        tokens = preprocess(text)
        bow = dictionary.doc2bow(tokens)
        topic_probs = lda_model.get_document_topics(bow)
        dominant_topic = max(topic_probs, key=lambda x: x[1])[0]
        return topic_responses.get(dominant_topic, "Thank you for reaching out.")


In [None]:
sample_review = complaints_df['Clean_Review'].sample(1).to_list()

In [None]:
sample_review


In [None]:
response = generate_response(sample_review[0], complaints_df)
print("Generated Response:", response)

In [None]:
dictionary.save("lda_dictionary.dict")


In [None]:
import pickle
lda_model.save("lda_model.gensim")
