In [1]:
# 这个模型将句子和段落映射到 512 维密集向量空间，可用于聚类或语义搜索等任务

In [2]:
# AutoDL官方学术资源加速
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np
from typing import List, Dict
import torch

In [4]:
class ChineseTextEvaluator:
    def __init__(self, cache_dir="/root/autodl-tmp/sentence_transformers"):
        # 创建缓存目录（如果不存在）
        os.makedirs(cache_dir, exist_ok=True)
        
        # 加载中文语义模型，指定缓存位置
        self.model = SentenceTransformer(
            'distiluse-base-multilingual-cased-v2',
            cache_folder=cache_dir
        )
    
    def semantic_similarity(self, text1: str, text2: str) -> float:
        """计算两段文本的语义相似度"""
        # 获取文本的向量表示
        embeddings = self.model.encode([text1, text2])
        
        # 计算余弦相似度
        similarity = np.dot(embeddings[0], embeddings[1]) / (
            np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])
        )
        return float(similarity)
    
    def evaluate_response(self, hypothesis: str, reference: str) -> Dict:
        """评估生成的回答"""
        semantic_score = self.semantic_similarity(hypothesis, reference)
        
        return {
            'semantic_similarity': semantic_score,
            'evaluation': {
                'excellent': semantic_score >= 0.9,
                'good': 0.7 <= semantic_score < 0.9,
                'fair': 0.5 <= semantic_score < 0.7,
                'poor': semantic_score < 0.5
            }
        }

In [5]:
# 使用示例
evaluator = ChineseTextEvaluator()

modules.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/2.69k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/610 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/539M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/531 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.58M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.58M [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/114 [00:00<?, ?B/s]

In [6]:
# 测试相似表达
examples = [
    {
        'hypothesis': '今天天气很好',
        'reference': '今天天气不错',
        'description': '同义表达'
    },
    {
        'hypothesis': '这个问题很难',
        'reference': '这道题目特别困难',
        'description': '类似含义'
    },
    {
        'hypothesis': '我喜欢吃苹果',
        'reference': '我讨厌吃香蕉',
        'description': '不同含义'
    }
]

for example in examples:
    result = evaluator.evaluate_response(example['hypothesis'], example['reference'])
    print(f"\n测试类型：{example['description']}")
    print(f"句子1：{example['hypothesis']}")
    print(f"句子2：{example['reference']}")
    print(f"语义相似度：{result['semantic_similarity']:.4f}")
    print("评估结果：", end=" ")
    for level, is_level in result['evaluation'].items():
        if is_level:
            print(level)
            break


测试类型：同义表达
句子1：今天天气很好
句子2：今天天气不错
语义相似度：0.9875
评估结果： excellent

测试类型：类似含义
句子1：这个问题很难
句子2：这道题目特别困难
语义相似度：0.8548
评估结果： good

测试类型：不同含义
句子1：我喜欢吃苹果
句子2：我讨厌吃香蕉
语义相似度：0.6470
评估结果： fair
