In [None]:
#标题 Bert 情感分析

In [None]:
import os
import pandas as pd
import torch
import warnings
from transformers import pipeline
from tqdm.auto import tqdm
from transformers.utils import logging

# 1. 环境配置
os.environ["HF_HUB_DISABLE_XET_WARNING"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HUB_DISABLE_XET"] = "1"
logging.set_verbosity_error()
warnings.filterwarnings("ignore")

# 2. 模型初始化
def init_analyzer():
    try:
        analyzer = pipeline(
            "text-classification",
            model="finiteautomata/bertweet-base-sentiment-analysis",
            device=0 if torch.cuda.is_available() else -1
        )
        print("✅ 模型加载成功")
        return analyzer
    except Exception as e:
        print(f"❌ 模型加载失败: {str(e)}")
        raise

emotion_analyzer = init_analyzer()

# 3. 情感计算函数
def calculate_sentiment(text):
    if not isinstance(text, str) or not text.strip():
        return 50.0  # 默认中性值
    
    try:
        result = emotion_analyzer(text[:512])[0]
        score = result['score']
        
        # 根据标签确定情绪方向
        if result['label'] == "POS":
            sentiment_value = score * 100  # 积极情绪，范围0-100
        else:
            sentiment_value = (1 - score) * 100  # 消极情绪，范围0-100
        
        return round(sentiment_value, 2)
    
    except Exception as e:
        print(f"分析出错: {str(e)}")
        return 50.0  # 默认中性值

# 4. 批量处理函数
def process_data(input_path, output_path):
    try:
        df = pd.read_excel(input_path)
        if 'id' not in df.columns or 'title' not in df.columns:
            raise ValueError("输入文件需要包含'id'和'title'列")
            
        # 初始化results列表
        results = []  # 修正了这里的语法错误
        
        # 使用iterrows遍历数据
        for _, row in tqdm(df.iterrows(), total=len(df), desc="处理进度"):
            sentiment = calculate_sentiment(str(row['title']))
            results.append({
                "id": row['id'],
                "title": row['title'],
                "sentiment": sentiment
            })
        
        # 转换为DataFrame
        result_df = pd.DataFrame(results)
        result_df = result_df[['id', 'title', 'sentiment']]
        
        # 保存结果
        result_df.to_excel(output_path, index=False)
        print(f"✅ 结果已保存到: {output_path}")
        
        # 打印统计
        print("\n情感值统计:")
        print(result_df['sentiment'].describe())
        
    except Exception as e:
        print(f"❌ 处理失败: {str(e)}")
        if os.path.exists(output_path):
            os.remove(output_path)

# 5. 主程序
if __name__ == "__main__":
    INPUT = r"XXXXXXXXXXXXXXXXXXXX"
    OUTPUT = r"XXXXXXXXXXXXXXXXXXXXXXXXXXX"
    
    print("="*50)
    print(" 情感计算开始 ".center(50, "="))
    print("="*50)
    
    process_data(INPUT, OUTPUT)

  from .autonotebook import tqdm as notebook_tqdm


✅ 模型加载成功


处理进度: 100%|██████████| 10173/10173 [12:42<00:00, 13.34it/s]


✅ 结果已保存到: D:\AAA\一\大论文\子研究二\情感值计算.xlsx

情感值统计:
count    10173.000000
mean         5.266420
std          3.131227
min          2.080000
25%          4.390000
50%          5.060000
75%          5.750000
max         99.230000
Name: sentiment, dtype: float64
