In [4]:
# -*- coding: utf-8 -*-
# 在Google Colab上运行的情感分析代码

# 安装必要的库
!pip install jieba openpyxl
!pip install chardet

# 导入库
import pandas as pd
import jieba
import chardet

# 检测文件编码
def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        result = chardet.detect(f.read())
    return result['encoding']

negative_encoding = detect_encoding('/content/tsinghua_negative_gb_1.txt')
positive_encoding = detect_encoding('/content/tsinghua_positive_gb_1.txt')

print(f"负面词典编码：{negative_encoding}")
print(f"正面词典编码：{positive_encoding}")

# 情感词典加载
positive_words = set()
negative_words = set()

# 按检测到的编码加载文件
with open('/content/tsinghua_negative_gb_1.txt', 'r', encoding=negative_encoding) as f:
    negative_words.update(line.strip() for line in f)

with open('/content/tsinghua_positive_gb_1.txt', 'r', encoding=positive_encoding) as f:
    positive_words.update(line.strip() for line in f)

# 定义情感分析函数
def sentiment_analysis(text):
    # 分词
    words = jieba.lcut(text)
    pos_count = sum(1 for word in words if word in positive_words)
    neg_count = sum(1 for word in words if word in negative_words)
    if pos_count > neg_count:
        return "正面"
    elif pos_count < neg_count:
        return "负面"
    else:
        return "中性"

# 读取 Excel 文件
file_path = '/content/cleaned_data.xlsx'  # 上传文件后路径
df = pd.read_excel(file_path)

# 读取文本
text_column = '回答文本'
if text_column not in df.columns:
    raise ValueError(f"请确保Excel文件包含列 '{text_column}'")

# 应用情感分析
df['sentiment'] = df[text_column].apply(sentiment_analysis)

# 保存情感分析结果
output_file = 'sentiment_results.xlsx'
df.to_excel(output_file, index=False)
print(f"分析完成，结果已保存为 {output_file}")

# 统计正面、中性、负面情感的数量
sentiment_counts = df['sentiment'].value_counts()

# 打印统计结果
print("情感统计结果：")
print(sentiment_counts)

# 保存统计结果到文件
stat_output_file = 'sentiment_statistics.xlsx'
sentiment_counts.to_excel(stat_output_file, index=True, header=['Count'])
print(f"情感统计结果已保存为 {stat_output_file}")


负面词典编码：UTF-16
正面词典编码：UTF-16
分析完成，结果已保存为 sentiment_results.xlsx
情感统计结果：
sentiment
正面    129
中性     39
负面     31
Name: count, dtype: int64
情感统计结果已保存为 sentiment_statistics.xlsx
