In [15]:
%%capture
import pandas as pd, os, nltk, tensorflow as tf
from nltk.corpus import stopwords
import numpy as np
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from nltk.corpus import words
nltk.download('stopwords')
nltk.download('words')
nltk.download('punkt_tab')

!pip install gradio
!pip install sentence_transformers
!pip install vaderSentiment
!pip install langdetect



In [30]:
# Setup logging

import logging

logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(message)s')

### **Prepare Dataset**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
base_folder = '/content/drive/MyDrive/'
googlie_review = os.path.join(base_folder, 'NPS_reviews_on_Tab_2023.xlsm')
nps_review = os.path.join(base_folder, 'Google_reviews_data_2023.xlsm')

In [20]:
_googlie_review = pd.read_excel(googlie_review)
_nps_review = pd.read_excel(nps_review)

In [21]:
_googlie_review.head()

Unnamed: 0,Date,Rating,Store Id,Remarks,Name,Contact,Sentiment: Positive(0)/Negative(1)/Neutral(2)
0,2023-12-31,6.25,4001,fine,Simran Naz,3046542000.0,2
1,2023-12-31,5.0,4001,noo,munaza,3025506000.0,1
2,2023-12-31,6.0,4001,well done happy new year do sale offer,Fahad sheikh,3218488000.0,0
3,2023-12-31,6.25,4035,good,Ayesha khald,3217313000.0,0
4,2023-12-30,5.75,1128,excellent,Rawish,3349101000.0,0


In [22]:
_nps_review.head()

Unnamed: 0,Date,Store Code,City,Rating,Comments,Review Reply,Reply Date,Customer Name,Sentiment: Positive(0)/Negative(1)/Neutral(2)
0,2023-12-30 00:00:00,1041.0,Wapda Town Lahore,5.0,Very cooperative staff,Thank you for your feedback.,2024-07-19,Ahmad hassan,0.0
1,2023-12-29 00:00:00,1145.0,Stylo MM Alam,4.0,Nice shoes for ladies and kids,Thank you for your feedback.,2024-07-19,Saqib Saqlain (LIFE),0.0
2,2023-12-29 00:00:00,1201.0,Rawalpindi,5.0,Nice,Thank you for your rating.,2024-07-19,MS Corporation,0.0
3,2023-12-28 00:00:00,1235.0,Lahore,3.0,The prices ar little bit high for a mediocre p...,Thank you for your feedback.,2024-07-19,Abdul Basit,2.0
4,2023-12-28 00:00:00,1022.0,Sahiwal,4.0,All type of shoes are available,Thank you for your feedback.,2024-07-19,SHAFQAT ABBAS,0.0


In [23]:

new_google = _googlie_review[['Rating', 'Remarks']]
new_nps = _nps_review[['Rating', 'Comments']]

new_google.rename(columns={'Remarks': 'reviews'}, inplace=True)
new_nps.rename(columns={'Comments': 'reviews'}, inplace=True)

new_google.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_google.rename(columns={'Remarks': 'reviews'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_nps.rename(columns={'Comments': 'reviews'}, inplace=True)


Unnamed: 0,Rating,reviews
0,6.25,fine
1,5.0,noo
2,6.0,well done happy new year do sale offer
3,6.25,good
4,5.75,excellent


In [24]:
new_nps.head()

Unnamed: 0,Rating,reviews
0,5.0,Very cooperative staff
1,4.0,Nice shoes for ladies and kids
2,5.0,Nice
3,3.0,The prices ar little bit high for a mediocre p...
4,4.0,All type of shoes are available


In [31]:
# Preprocessing: Remove null values, extra spaces, and new lines

def preprocess_reviews(reviews):
    # Remove null or empty reviews
    reviews = [review for review in reviews if isinstance(review, str) and review.strip()]

    # Strip extra spaces and new lines
    reviews = [review.strip().replace("\n", " ").replace("\r", " ") for review in reviews]

    return reviews

# Apply preprocessing to  Google reviews
if 'reviews' in new_google:
    google_reviews = preprocess_reviews(new_google['reviews'])
else:
    raise KeyError("The key 'reviews' does not exist in the input data.")

# Apply preprocessing to  NPS reviews
if 'reviews' in new_nps:
    nps_reviews = preprocess_reviews(new_nps['reviews'])
else:
    raise KeyError("The key 'reviews' does not exist in the input data.")

logging.info("Preprocessing completed. Null and extra spaces removed. Incomplete words kept for now for clarity, will be re-analyzed later.")

In [32]:
# GOOGLE DATASET CLASSIFICATION


from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from langdetect import detect

# Predefined complaint types
complaint_types = [
     "Fitting","Quality", "Staff Behaviour", "Network", "Bad", "Good", "Ambiance", "Article","Packing","Price","Variety","No stock","Environment", "Cleanliness", "Crowded", "Service"
]


# Initialize SentenceTransformer (for complaint type embeddings)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Create embeddings for the complaint types
complaint_embeddings = model.encode(complaint_types)

# Step 2: Perform Sentiment Analysis to identify negative and neutral reviews
analyzer = SentimentIntensityAnalyzer()
negative_reviews = []
negative_review_indices = []
pos_reviews = []
pos_review_indices = []
review_sentiments = []  # List to store sentiment of each review

# Filter reviews based on sentiment scores
for i, review in enumerate(google_reviews):
    score = analyzer.polarity_scores(review)
    if score['compound'] < -0.05:  # Negative reviews (threshold can be adjusted)
        if len(review.split()) > 1 and detect(review) == 'en':
            negative_reviews.append(review)
            negative_review_indices.append(i)
        review_sentiments.append(1)  # Negative sentiment
    elif score['compound'] > 0.05:  # Positive reviews
        if len(review.split()) > 1 and detect(review) == 'en':
            pos_reviews.append(review)
            pos_review_indices.append(i)
        review_sentiments.append(0)  # Positive sentiment
    else:  # Neutral reviews
        review_sentiments.append(2)  # Neutral sentiment

# Step 3: Create embeddings for the reviews
negative_review_embeddings = model.encode(negative_reviews)
pos_review_embeddings = model.encode(pos_reviews)

# Step 4: Compare each review embedding to complaint type embeddings and map to the most similar complaint
def classify_reviews(review_embeddings, reviews):
    mapped_complaints = []
    for review_embedding in review_embeddings:
        # Compute cosine similarity between the review embedding and all complaint embeddings
        similarities = cosine_similarity([review_embedding], complaint_embeddings)

        # Get the index of the highest similarity score
        best_match_index = np.argmax(similarities)

        # Map to the corresponding complaint type
        mapped_complaints.append(complaint_types[best_match_index])

    return mapped_complaints

# Classify negative and positive reviews
negative_mapped_complaints = classify_reviews(negative_review_embeddings, negative_reviews)
neutral_mapped_complaints = classify_reviews(pos_review_embeddings, pos_reviews)

# Output the mapped complaints for each negative review
for i, (review, complaint) in enumerate(zip(negative_reviews, negative_mapped_complaints)):
    print(f"Review: {google_reviews[negative_review_indices[i]]}")
    print(f"Sentiment: Negative")
    print(f"Mapped Classification: {complaint}")
    print()

# Output the mapped complaints for each positive review
for i, (review, complaint) in enumerate(zip(pos_reviews, neutral_mapped_complaints)):
    print(f"Review: {google_reviews[pos_review_indices[i]]}")
    print(f"Sentiment: Positive")
    print(f"Mapped Classification: {complaint}")
    print()

# Print the list of sentiments for all reviews
# print("Sentiments for all reviews:", review_sentiments)

logging.info('Used all_mini_L6_v2 for embeddings')
logging.info('positives and negatives both analyzed')
logging.info('putting incorrect words into neutral and leaving them out')


Review: quality is not good
Sentiment: Negative
Mapped Classification: Quality

Review: whyyyy tf is the 41 size smaller now???????¿?? FIX DAT SHIT please make sure the sizes are good, we pay shit ton of money to buy god shoes and they are small af now. extremely dissatisfied .
Sentiment: Negative
Mapped Classification: Fitting

Review: I recently bought 2 shoes and found worst quality ever. I am a regular customer but this time I'm really disappointed and emporium mall branch outlet is not helping at all. one of my shoe broke right after wearing and other one is really uncomfortable. please work on your quality and comfort. Thanks
Sentiment: Negative
Mapped Classification: Quality

Review: no net working to check size in another branch no one want to help or call
Sentiment: Negative
Mapped Classification: Network

Review: bad not come
Sentiment: Negative
Mapped Classification: Bad

Review: very bad quality
Sentiment: Negative
Mapped Classification: Quality

Review: yours shoes packing

In [27]:

import gradio as gr

def classify_new_review(review):
    # Perform sentiment analysis on the input review
    score = analyzer.polarity_scores(review)

    # Check if the review has both positive and negative sentiments
    if score['neg'] > score['pos']:
        sentiment = "Negative"
    elif score['compound'] < -0.05:
        sentiment = "Negative"
    elif -0.05 <= score['compound'] <= 0.05:
        if len(review.split()) >= 1 and detect(review) == 'en':
            sentiment = "Neutral"
        else:
            return "Review is not suitable for classification (too short or not in English)."
    else:
        sentiment = "Positive"

    # Encode the review
    review_embedding = model.encode([review])

    # Compute cosine similarity with complaint type embeddings
    similarities = cosine_similarity(review_embedding, complaint_embeddings)

    # Get the index of the highest similarity score
    best_match_index = np.argmax(similarities)

    # Map to the corresponding complaint type
    complaint_type = complaint_types[best_match_index]

    # Map complaint types to departments
    department_mapping = {
        "Fitting": "Sales Dept",
        "Quality": "Manufacturing Dept",
        "Staff Behaviour": "HR Dept",
        "Network": "IT Dept",
        "Bad": "General Feedback",
        "": "General Feedback",
        "Good": "General Feedback",
        "Ambiance": "Operations Dept",
        "Article": "Sales Dept",
        "Packing": "Logistics Dept",
        "Price": "Sales Dept",
        "Variety": "Merchandising Dept",
        "No stock": "Inventory Dept",
        "Environment": "Facility Management",
        "Cleanliness": "Facility Management",
        "Crowded": "Operations Dept",
        "Service": "Customer Support"
    }

    # Determine the department
    if sentiment == "Negative":
        department = department_mapping.get(complaint_type)
        if department == "General Feedback":
          department = "None (General Feedback)"
    else:
        department = department_mapping.get(complaint_type)
        if department == "General Feedback":
          department = "None (General Feedback)"
        else:
          department = "None (Positive Feedback)"

    return f"Sentiment: {sentiment}\nMapped Classification: {complaint_type}\nDepartment: {department}"

# Gradio interface
interface = gr.Interface(
    fn=classify_new_review,
    inputs=gr.Textbox(label="Enter a Customer Review"),
    outputs=gr.Textbox(label="Classification Result"),
    title="Customer Review Classification",
    description="Enter a customer review to classify it into predefined complaint categories."
)

# Launch the interface
interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7f862d2fcf6fc9ae93.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [28]:
# NPS DATA CLASSIFICATION

from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Predefined complaint types
complaint_types = [
     "Fitting","Quality", "Staff Behaviour", "OTP Issue", "Bad", "Good", "Best", "Ambiance", "Article","Packing","Price","Variety","No stock","Environment", "Cleanliness", "Crowded"

]

# Initialize SentenceTransformer (for complaint type embeddings)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 1: Create embeddings for the complaint types
complaint_embeddings = model.encode(complaint_types)

# Step 2: Perform Sentiment Analysis to identify negative reviews
analyzer = SentimentIntensityAnalyzer()
negative_reviews = []
negative_review_indices = []
pos_reviews = []
pos_review_indices = []

# Filter only negative reviews based on the sentiment score
for i, review in enumerate(nps_reviews):
    if isinstance(review, str):  # Check if the review is a string
        score = analyzer.polarity_scores(review)
        if score['compound'] < -0.05:  # Negative reviews (threshold can be adjusted)
            negative_reviews.append(review)
            negative_review_indices.append(i)  # Save the index of negative reviews
        elif score['compound'] > 0.06:  # Positive reviews
            pos_reviews.append(review)
            pos_review_indices.append(i)  # Save the index of positive reviews

#_________________________________________________________________
# Step 3: Create embeddings for the negative reviews only
if negative_reviews:  # Check if we have any negative reviews
    negative_review_embeddings = model.encode(negative_reviews)

    # Step 4: Compare each negative review embedding to complaint type embeddings and map to the most similar complaint
    mapped_complaints = []
    for review_embedding in negative_review_embeddings:
        # Compute cosine similarity between the review embedding and all complaint embeddings
        similarities = cosine_similarity([review_embedding], complaint_embeddings)

        # Get the index of the highest similarity score
        best_match_index = np.argmax(similarities)

        # Default to the highest similarity match
        mapped_complaints_for_review = [complaint_types[best_match_index]]

        # Add the mapped complaints for this review
        mapped_complaints.append(mapped_complaints_for_review)

    # Output the mapped complaints for each negative review
    for i, (review, complaints) in enumerate(zip(negative_reviews, mapped_complaints)):
        print(f"Review: {nps_reviews[negative_review_indices[i]]}")  # Display the original review
        print(f"Mapped Issue: {', '.join(complaints)}")
        print(f"Sentiment: Negative")

        print()


#______________________________________________________________
# Step 3: Create embeddings for the positive reviews only
if pos_reviews:  # Check if we have any negative reviews
    pos_review_embeddings = model.encode(negative_reviews)

    # Step 4: Compare each negative review embedding to complaint type embeddings and map to the most similar complaint
    mapped_complaints = []
    for review_embedding in pos_review_embeddings:
        # Compute cosine similarity between the review embedding and all complaint embeddings
        similarities = cosine_similarity([review_embedding], complaint_embeddings)

        # Get the index of the highest similarity score
        best_match_index = np.argmax(similarities)

        # Default to the highest similarity match
        mapped_complaints_for_review = [complaint_types[best_match_index]]

        # Add the mapped complaints for this review
        mapped_complaints.append(mapped_complaints_for_review)

    # Output the mapped complaints for each negative review
    for i, (review, complaints) in enumerate(zip(pos_reviews, mapped_complaints)):
        print(f"Review: {nps_reviews[pos_review_indices[i]]}")  # Display the original review
        print(f"Mapped Classification: {', '.join(complaints)}")
        print(f"Sentiment: Positive")
        print()


Review: I recently had the unfortunate experience of dealing with Stylo Shoes, and I must say it was far from satisfactory. The customer service I encountered was nothing short of a nightmare. From unhelpful representatives to a lack of responsiveness, my interaction with Stylo Shoes has left me with a sour taste. When reaching out for assistance regarding a purchase issue, I encountered a disinterested and uncooperative customer service team. Instead of addressing my concerns promptly, they seemed indifferent to my problems, providing vague responses that left me feeling frustrated and undervalued as a customer. This experience has made me to never shop from Stylo Shoes again. In a market saturated with options, the importance of exceptional customer service cannot be overstated. Unfortunately, Stylo Shoes falls short in this aspect, and as a result, they have lost a once-loyal customer.
Mapped Issue: Staff Behaviour
Sentiment: Negative

Review: Most pathetic experience ever. They hav

In [29]:

import gradio as gr

def classify_new_review(review):
    # Perform sentiment analysis on the input review
    score = analyzer.polarity_scores(review)

    # Check if the review has both positive and negative sentiments
    if score['neg'] > score['pos']:
        sentiment = "Negative"
    elif score['compound'] < -0.05:
        sentiment = "Negative"
    elif -0.05 <= score['compound'] <= 0.05:
        if len(review.split()) >= 1 and detect(review) == 'en':
            sentiment = "Neutral"
        else:
            return "Review is not suitable for classification (too short or not in English)."
    else:
        sentiment = "Positive"

    # Encode the review
    review_embedding = model.encode([review])

    # Compute cosine similarity with complaint type embeddings
    similarities = cosine_similarity(review_embedding, complaint_embeddings)

    # Get the index of the highest similarity score
    best_match_index = np.argmax(similarities)

    # Map to the corresponding complaint type
    complaint_type = complaint_types[best_match_index]

    # Map complaint types to departments
    department_mapping = {
        "Fitting": "Sales Dept",
        "Quality": "Manufacturing Dept",
        "Staff Behaviour": "HR Dept",
        "Network": "IT Dept",
        "Bad": "General Feedback",
        "": "General Feedback",
        "Good": "General Feedback",
        "Ambiance": "Operations Dept",
        "Article": "Sales Dept",
        "Packing": "Logistics Dept",
        "Price": "Sales Dept",
        "Variety": "Merchandising Dept",
        "No stock": "Inventory Dept",
        "Environment": "Facility Management",
        "Cleanliness": "Facility Management",
        "Crowded": "Operations Dept",
        "Service": "Customer Support"
    }

    # Determine the department
    if sentiment == "Negative" or sentiment == "Neutral":
        department = department_mapping.get(complaint_type)
        if department == "General Feedback":
          department = "None (General Feedback)"
    elif sentiment == "Positive":
        department = department_mapping.get(complaint_type)
        if department == "General Feedback":
          department = "None (General Feedback)"
        else:
          department = "None (Positive Feedback)"


    return f"Sentiment: {sentiment}\nMapped Classification: {complaint_type}\nDepartment: {department}"

# Gradio interface
interface = gr.Interface(
    fn=classify_new_review,
    inputs=gr.Textbox(label="Enter a Customer Review"),
    outputs=gr.Textbox(label="Classification Result"),
    title="Customer Review Classification",
    description="Enter a customer review to classify it into predefined complaint categories."
)

# Launch the interface
interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://674f4fac44b36abca6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [33]:
logging.info('gradio interface used')