In [32]:
from sentence_transformers import SentenceTransformer, util
import numpy as np
import csv


In [33]:

# 加载模型
original_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')  # 原始模型
finetuned_model = SentenceTransformer('finetuned_paraphrase-multilingual-MiniLM-L12-v2')  # 替换为你的finetuned模型路径


In [34]:

# 目标句子
target_sentences = ["查询服务器信息", "服务器现在开着吗？"]

# 计算目标句子的嵌入
target_embeddings = {
    'original': original_model.encode(target_sentences, convert_to_tensor=True),
    'finetuned': finetuned_model.encode(target_sentences, convert_to_tensor=True)
}

# 读取数据集
with open('./dataset/short.txt', 'r', encoding='utf-8') as file:
    lines = file.readlines()


In [None]:

# 计算相似度并保存到csv
with open('res.csv', mode='w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['句子', '原始模型_查询服务器信息', '原始模型_服务器现在开着吗', 'finetuned模型_查询服务器信息', 'finetuned模型_服务器现在开着吗']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    
    for line in lines:
        # 去除首尾空格
        sentence = line.strip()
        
        if len(sentence) >= 2:  # 确保句子长度大于等于2
            # 计算原始模型和finetuned模型的相似度
            original_sim = [
                util.pytorch_cos_sim(original_model.encode(sentence, convert_to_tensor=True), target_embeddings['original'][i]).item()
                for i in range(len(target_sentences))
            ]
            finetuned_sim = [
                util.pytorch_cos_sim(finetuned_model.encode(sentence, convert_to_tensor=True), target_embeddings['finetuned'][i]).item()
                for i in range(len(target_sentences))
            ]
            
            # 写入csv
            writer.writerow({
                '句子': sentence,
                '原始模型_查询服务器信息': original_sim[0],
                '原始模型_服务器现在开着吗': original_sim[1],
                'finetuned模型_查询服务器信息': finetuned_sim[0],
                'finetuned模型_服务器现在开着吗': finetuned_sim[1]
            })
            
print("相似度计算完成，结果已保存至res.csv")

相似度计算完成，结果已保存至res.csv
