# 文本向量化

In [None]:
from FlagEmbedding import BGEM3FlagModel

model = BGEM3FlagModel('BAAI/bge-m3',  
                       use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation

sentences_1 = ["What is BGE M3?", "Defination of BM25"]
sentences_2 = ["BGE M3 is an embedding model supporting dense retrieval, lexical matching and multi-vector interaction.", 
               "BM25 is a bag-of-words retrieval function that ranks a set of documents based on the query terms appearing in each document"]

embeddings_1 = model.encode(sentences_1, 
                            batch_size=12, 
                            max_length=8192, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
                            )['dense_vecs']
embeddings_2 = model.encode(sentences_2)['dense_vecs']
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

# 评估脚本

In [14]:
import yaml
import pandas as pd

from tqdm import tqdm
from modules.agent import CommentAnalysisAgent



def read_csv(testset_path, version='default'):
    # 读取测试集
    df = pd.read_csv(testset_path, sep='\t')
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

    df['v1_4o_mini'] = df['version']
    df[version] = ''
    print(df.columns)
    return df


def evaluate(robot, testset, testset_path, version):

    length = len(testset)
    for i in tqdm(range(length)):
        comment = testset.iloc[i]['内容']
        if not isinstance(comment, str):
            continue

        response = robot.comment_analyze(comment)
        testset.loc[i, version] = response
    
    testset.to_csv(testset_path, index=False, sep='\t')

    return testset


def metric_calculate(testset, version):
    total_extraction = 0
    correct_extraction = 0
    wrong_extraction = 0
    expected_extraction = 0

    length = len(testset)
    for i in range(length):
        comment = testset.iloc[i]['内容']
        if not isinstance(comment, str):
            continue

        good_points = []
        bad_points = []
        # 很多column是好评点x, 差评点x的格式，从1开始，动态遍历所有这些columns，存储到good_points和bad_points中
        for j in range(1, 100):
            try:
                good_point = testset.iloc[i]['好评点' + str(j)]
            except:
                break
            if pd.notna(good_point):
                good_points.append(good_point)
        for j in range(1, 100):
            try:
                bad_point = testset.iloc[i]['差评点' + str(j)]
            except:
                break
            if pd.notna(bad_point):
                bad_points.append(bad_point)
        
        expected_extraction += len(good_points) + len(bad_points)
        response = testset.iloc[i][version]
        try:
            output = eval(response)
            # 遍历 output，将数据存入 DataFrame
            
            for key, value in output.items():
                total_extraction += 1
                if key[:3] == '好评点':
                    if value in good_points:
                        correct_extraction += 1
                    else:
                        wrong_extraction += 1
                elif key[:3] == '差评点':
                    if value in bad_points:
                        correct_extraction += 1
                    else:
                        wrong_extraction += 1
        except:
            pass
    print('total_extraction:', total_extraction)
    print('correct_extraction:', correct_extraction)
    print('wrong_extraction:', wrong_extraction)
    precision = correct_extraction / total_extraction
    recall = correct_extraction / expected_extraction
    print('precision:', precision)
    print('recall:', recall)

    return precision, recall
        

with open("openai_keys.yaml", "r", encoding="utf-8") as file:
    data = yaml.safe_load(file)

my_key = data['tom']['key']
robot = CommentAnalysisAgent(openai_key=my_key, model="gpt-4o-2024-08-06")  # gpt-4o-2024-08-06  gpt-4o-mini-2024-07-18

version = 'v1_4o'
testset_path = './docs/Comment_Analysis_Testset.csv'
testset = read_csv(testset_path, version)

testset = evaluate(robot, testset, testset_path, version)

print('version: ', version)
precision, recall = metric_calculate(testset, version)

Index(['标题', '内容', '星级', '赞同数', '评论链接', '评论时间', '好评点1', '好评点2', '好评点3', '好评点4',
       '好评点5', '好评点6', '差评点1', '差评点2', '差评点3', '差评点4', '差评点5', '差评点6', '差评点7',
       'version', 'v1', 'v1_4o_mini', 'v1_4o'],
      dtype='object')


100%|██████████| 102/102 [02:58<00:00,  1.75s/it]

version:  v1_4o
total_extraction: 270
correct_extraction: 128
wrong_extraction: 142
precision: 0.4740740740740741
recall: 0.5





In [10]:
print('version': version)
precision, recall = metric_calculate(testset, version)

total_extraction: 309
correct_extraction: 148
wrong_extraction: 161
precision: 0.47896440129449835
recall: 0.578125
