In [1]:
# Import libs
import pandas as pd

In [2]:
# Load data
data = pd.read_csv('feedback_detail.csv')
data = pd.DataFrame(data)
data.head()

Unnamed: 0,GeneralFeedbackID,Title,Content,Upvote,Rating,CreatedDate,UserID,ProductID,FeedbackDetailID
0,19708302,C·ª±c k√¨ h√†i l√≤ng,ƒê·ª£t n√†y mua t·∫∑ng 2 c√¢y tr·∫Øng ƒëen nh∆∞ng qu√™n ch...,3,5,1704256818,5178168,184036446.0,1
1,19708307,C·ª±c k√¨ h√†i l√≤ng,ƒê·ª£t n√†y mua t·∫∑ng 2 c√¢y tr·∫Øng (giao t·ª´ Hanoi) ƒë...,2,5,1704256916,5178168,184059211.0,2
2,17356319,C·ª±c k√¨ h√†i l√≤ng,"5* cho m√°y c√≤n nguy√™n seal, h√†ng ch√≠nh h√£ng VN...",184,5,1660043992,945720,123345348.0,3
3,19694107,R·∫•t kh√¥ng h√†i l√≤ng,"Mua x√†i m·ªõi c√≥ 1 ng√†y m√† ƒë√£ l·ªói, c√≥ cu·ªôc g·ªçi t...",1,1,1703434010,15052027,197214029.0,4
4,18794713,C·ª±c k√¨ h√†i l√≤ng,Ch·∫•t l∆∞·ª£ng s·∫£n ph·∫©m c·ªßa Shop r·∫•t t·ªët ƒë·∫πp full ...,20,5,1676287655,28830179,271966786.0,5


In [3]:
# Drop columns
data = data.drop(columns=['ProductID'])

In [4]:
data.columns

Index(['GeneralFeedbackID', 'Title', 'Content', 'Upvote', 'Rating',
       'CreatedDate', 'UserID', 'FeedbackDetailID'],
      dtype='object')

In [5]:
# Check the datatype
data.dtypes

GeneralFeedbackID     int64
Title                object
Content              object
Upvote                int64
Rating                int64
CreatedDate           int64
UserID                int64
FeedbackDetailID      int64
dtype: object

In [6]:
def categorize_feedback(feedback):
    if isinstance(feedback, str):
        product_keywords = ['s·∫£n ph·∫©m', 'm√°y', 'h√†ng', 'ch·∫•t l∆∞·ª£ng']
        service_keywords = ['giao h√†ng', 'ƒë√≥ng g√≥i', 'shipper', 'shop']
        
        if any(keyword in feedback.lower() for keyword in product_keywords):
            return 'Product'
        elif any(keyword in feedback.lower() for keyword in service_keywords):
            return 'Service'
        else:
            return 'Unknown'
    else:
        return 'Unknown'

data['Category'] = data['Content'].apply(categorize_feedback)

In [7]:
category_counts = data['Category'].value_counts()
print("Feedback counts by category:")
print(category_counts)

Feedback counts by category:
Category
Unknown    6236
Product    2559
Service      79
Name: count, dtype: int64


In [9]:
from textblob import TextBlob

def analyze_sentiment(feedback):
    if isinstance(feedback, str):
        analysis = TextBlob(feedback)
        if analysis.sentiment.polarity > 0:
            return 'Positive'
        elif analysis.sentiment.polarity < 0:
            return 'Negative'
        else:
            return 'Neutral'
    else:
        return 'Unknown'

data['Sentiment'] = data['Content'].apply(analyze_sentiment)

In [10]:
print("Product feedback sentiment counts:")
print(data[data['Category'] == 'Product']['Sentiment'].value_counts())

print("\nService feedback sentiment counts:")
print(data[data['Category'] == 'Service']['Sentiment'].value_counts())

Product feedback sentiment counts:
Sentiment
Neutral     2123
Positive     347
Negative      89
Name: count, dtype: int64

Service feedback sentiment counts:
Sentiment
Neutral     69
Positive     8
Negative     2
Name: count, dtype: int64


In [19]:
import nltk
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

def generate_summary(feedback):
    parser = PlaintextParser.from_string(feedback, Tokenizer("english"))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, 3)  # Generate 3 sentence summary
    return ' '.join(str(sentence) for sentence in summary)

product_feedback = ' '.join(data[data['Category'] == 'Product']['Content'])
product_summary = generate_summary(product_feedback)

service_feedback = ' '.join(data[data['Category'] == 'Service']['Content'])
service_summary = generate_summary(service_feedback)

print("Product feedback summary:")
print(product_summary)

print("\nService feedback summary:")
print(service_summary)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\kietd\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
  warn(message % (words_count, sentences_count))


Product feedback summary:
s·∫£n ph·∫©m gi·ªëng qu·∫£ng c√°o, giao h√†ng nhanh ƒë√∫ng h·∫πn, m√°y ho·∫°t ƒë·ªông t·ªët, kh√¥ng g·∫∑p v·∫•n ƒë·ªÅ g√¨, nh∆∞ng l√∫c b√≥c seal xong ph√°t hi·ªán c√≥ m·ªôt v·∫øt x∆∞·ªõc nh·∫π tr√™n tay th·ªè g·∫ßn loa tho·∫°i ch∆∞a bi·∫øt l√Ω do, n√™n cho 3 sao th√¥i nh√©. H√¨nh ch·ª•p trƒÉng ngo√†i c·ª≠a s·ªï sau 6 th√°ng d√πng m√°y Tiki giao h√†ng nhanh, ƒë√≥ng g√≥i c·∫©n th·∫≠n, m√¨nh d√πng ƒë∆∞·ª£c tu·∫ßn r·ªìi c≈©ng ch∆∞a th·∫•y v·∫•n ƒë·ªÅ g√¨ T·ªët, s·∫£n ph·∫©m ch√≠nh h√£ng, c√≥ k√≠ch ho·∫°t b·∫£o ngay ngay mua th√†nh c√¥ng ƒëi·ªán tho·∫°i ch√≠nh h√£ng, v√† c√≤n nguy√™n seal, mua v√¨ c√°i form m√°y bo tr√≤n m·ªÅm m·∫°i d·ªÖ b·ªè t√∫i ch·ª© ch·ª©c nƒÉng th√¨ c≈©ng k kh√°c g√¨ ƒë·ªùi 12 v√† 13 ƒê·ªô ph√¢n gi·∫£i c·ªßa ƒëi·ªán tho·∫°i n√†y r·∫•t t·ªá. -  M√†n hinh iPhone 15 Pro Max c√≥ k√≠ch th∆∞·ªõc l·ªõn, s·ª≠ d·ª•ng c√¥ng ngh·ªá  Super Retina XDR c√πng ƒë·ªô ph√¢n gi·∫£i cao, mang t·ªõi ch·∫•t l∆∞·ª£ng h√¨nh ·∫£nh hi·ªÉn th·ªã s·∫Øc n√©t, ch√¢n th·ª±c