In [5]:
import pandas as pd

# 读取Excel文件
file_path = "jingdong_comments.xlsx"
data = pd.read_excel(file_path, usecols=["text", "location"])

# 检查前几行数据
print(data.head())


                                                text location
0  你说得对，但是980Pro是8月9号装上去，9月11号与9月21号能连掉两次的高端ssd；一...       安徽
1  买两个盘，用三星魔术师测试写入读取持续高温到85到88度，高温，我就退货，但是京东售后收到货...       安徽
2                                     言过其实，完全没有大厂的品质       安徽
3                                             少了一百个g       安徽
4                                             这速度正常吗       安徽


In [6]:
import spacy

# 加载spaCy中文模型
nlp = spacy.load("zh_core_web_sm")


In [23]:
# feature keyword
feature_categories = {
    "capacity_issues": ["容量", "内存", "存储不足", "空间不足", "内存缩水"],
    "price_warranty": ["价格", "保修", "保修期", "保修问题", "贵"],
    "quality_issues": ["质量", "传输速度", "掉盘", "过热", "坏道"],
    "cost_effective": ["性价比", "不划算"],
    "logistics_problems": ["物流", "包装", "配送", "送货慢"],
    "after_sales": ["售后", "退货", "退换货", "客服", "服务问题"],
    "missing_parts": ["零件", "螺丝", "配件缺失", "没发"],
    "not_national_product": ["不是国行", "非国行", "水货"]
}

def classify_features(text):
    matched_features = []
    for category, keywords in feature_categories.items():
        for keyword in keywords:
            if keyword in text:
                matched_features.append(category)
                break  # 每个类别只需匹配一次
    return matched_features

data["classified_features"] = data["text"].apply(classify_features)

print(data[["text", "classified_features"]].head())

                                                text classified_features
0  你说得对，但是980Pro是8月9号装上去，9月11号与9月21号能连掉两次的高端ssd；一...       [after_sales]
1  买两个盘，用三星魔术师测试写入读取持续高温到85到88度，高温，我就退货，但是京东售后收到货...       [after_sales]
2                                     言过其实，完全没有大厂的品质                  []
3                                             少了一百个g                  []
4                                             这速度正常吗                  []


In [33]:
def sentiment_analysis(text):
    positive_words = ["好", "优秀", "快", "满意", "清晰",""]
    negative_words = ["差", "慢", "不满意", "模糊", "贵","韭菜","少","抠门","垃圾"]
    score = 0
    for word in positive_words:
        if word in text:
            score += 1
    for word in negative_words:
        if word in text:
            score -= 1
    return "positive" if score > 0 else "negative" if score < 0 else "neutral"

# 结合特性分类的情感分析
def feature_sentiment_analysis(row):
    sentiments = {}
    for feature in row["classified_features"]:
        sentiments[feature] = sentiment_analysis(row["text"])
    return sentiments

# 添加情感分析结果
data["feature_sentiments"] = data.apply(feature_sentiment_analysis, axis=1)

# 查看结果
print(data[["text", "classified_features", "feature_sentiments"]].head())

                                                text classified_features  \
0  你说得对，但是980Pro是8月9号装上去，9月11号与9月21号能连掉两次的高端ssd；一...       [after_sales]   
1  买两个盘，用三星魔术师测试写入读取持续高温到85到88度，高温，我就退货，但是京东售后收到货...       [after_sales]   
2                                     言过其实，完全没有大厂的品质                  []   
3                                             少了一百个g                  []   
4                                             这速度正常吗                  []   

            feature_sentiments  
0  {'after_sales': 'positive'}  
1  {'after_sales': 'positive'}  
2                           {}  
3                           {}  
4                           {}  


In [35]:
output_path = "jingdong_comments_feature_analysis.xlsx"
data.to_excel(output_path, index=False)
print(f"分析结果已保存到 {output_path}")

分析结果已保存到 jingdong_comments_feature_analysis.xlsx
