In [1]:
import os
from bert_serving.client import BertClient
import pandas as pd
import numpy as np
import re

# 创建 BertClient 实例，并禁用长度检查
bc = BertClient(check_length=True)

# 指定要读取的文件目录路径
directory_path = 'C:\\Users\\22749\\Desktop\\UoG\\Fintech\\Dissertation\\Data\\Cleaned_text'

# 初始化结果列表
results = []

# 定义月份顺序
months_order = ["January", "February", "March", "April", "May", "June", 
                "July", "August", "September", "October", "November", "December"]

# 定义文件排序函数
def sort_key(filename):
    match = re.match(r"([a-zA-Z]+)_(\d{4})\.txt", filename)
    if match:
        month, year = match.groups()
        month_index = months_order.index(month)
        return (int(year), month_index)
    return (9999, 0)  # 默认值以确保无效文件名排在最后

# 读取目录中的所有文件并排序
for filename in sorted(os.listdir(directory_path), key=sort_key):
    file_path = os.path.join(directory_path, filename)
    
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # 使用 FinBERT 进行情绪分析
    encoded_values = bc.encode([text])
    
    # 初始化情绪计数
    positive_sum = 0
    neutral_sum = 0
    negative_sum = 0
    
    # 对每个情绪向量进行处理
    for vector in encoded_values:
        positive_sum += vector[0]
        neutral_sum += vector[1]
        negative_sum += vector[2]
    
    # 计算平均值
    num_vectors = encoded_values.shape[0]
    positive_avg = positive_sum / num_vectors
    neutral_avg = neutral_sum / num_vectors
    negative_avg = negative_sum / num_vectors
    
    # 将结果数组转换为逗号分隔的字符串
    encoded_values_str = ', '.join(map(str, encoded_values[0]))
    
    # 将结果添加到列表中
    results.append({
        'Filename': filename,
        'Model': 'finbert',
        'Sentiment': encoded_values_str,
        'Overall Positive Sentiment': positive_avg,
        'Overall Neutral Sentiment': neutral_avg,
        'Overall Negative Sentiment': negative_avg
    })

# 创建 DataFrame 并保存为 Excel 文件
df = pd.DataFrame(results)
output_path = 'C:\\Users\\22749\\Desktop\\UoG\\Fintech\\Dissertation\\Data\\sentiment_results.xlsx'
df.to_excel(output_path, index=False)

print(f'Sentiment analysis results saved to {output_path}')


here is what you can do:
- or, start a new server with a larger "max_seq_len"
  '- or, start a new server with a larger "max_seq_len"' % self.length_limit)


Sentiment analysis results saved to C:\Users\22749\Desktop\UoG\Fintech\Dissertation\Data\sentiment_results.xlsx
