In [17]:
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM, GPT2Tokenizer, GPT2Model, GPT2LMHeadModel, AutoConfig

# 加载 LLaMA 分词器
tokenizer = LlamaTokenizer.from_pretrained('/data/LLM_models/llama-7b', token="your_token")

# 示例文本
text = "Hello, my name is Kimi and I am a large language model."

# 使用分词器编码文本
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)

# 查看 input_ids 的长度
print("Length of input_ids:", inputs["input_ids"].size())

Length of input_ids: torch.Size([1, 16])


In [None]:
import math
from collections import Counter

def ngram_precision(candidate, reference, n):
    # 提取候选字符串和参考字符串的n-grams
    candidate_ngrams = Counter([counter for counter in zip(*[candidate[i:] for i in range(n)])])
    reference_ngrams = Counter([counter for counter in zip(*[reference[i:] for i in range(n)])])
    clipped_count = sum(min(candidate_ngrams[gram], reference_ngrams[gram]) for gram in candidate_ngrams)
    all_count = sum(candidate_ngrams[gram] for gram in candidate_ngrams)
    precision = clipped_count / all_count if candidate_ngrams else 0
    return precision

def brevity_penalty(candidate, reference):
    if len(candidate) > len(reference):
        return 1
    ratio = len(candidate) / len(reference) if len(reference) > 0 else 0
    return math.exp(1 - ratio) if ratio < 1 else 1

def sentence_bleu(candidate, reference, max_n=4):
    # 计算BLEU分数
    p_ns = [ngram_precision(candidate, reference, n) for n in range(1, max_n + 1) ]
    p_ns = [p for p in p_ns if p > 0]  # 移除0值
    if not p_ns:
        return 0  # 如果没有匹配的n-grams，则BLEU分数为0

    geo_mean = math.exp(math.fsum(math.log(p) for p in p_ns) / len(p_ns))
    bp = brevity_penalty(candidate, reference) 
    return bp * geo_mean

# 示例使用
candidate = "the cat is on the mat"
references = "the cat s"

score = sentence_bleu(candidate.split(), references.split())
print(f"BLEU score: {score:.4f}")

Counter({('the',): 2, ('cat',): 1, ('is',): 1, ('on',): 1, ('mat',): 1})
Counter({('the',): 1, ('cat',): 1, ('s',): 1})
2
0.3333333333333333
Counter({('the', 'cat'): 1, ('cat', 'is'): 1, ('is', 'on'): 1, ('on', 'the'): 1, ('the', 'mat'): 1})
Counter({('the', 'cat'): 1, ('cat', 's'): 1})
1
0.2
Counter({('the', 'cat', 'is'): 1, ('cat', 'is', 'on'): 1, ('is', 'on', 'the'): 1, ('on', 'the', 'mat'): 1})
Counter({('the', 'cat', 's'): 1})
0
0.0
Counter({('the', 'cat', 'is', 'on'): 1, ('cat', 'is', 'on', 'the'): 1, ('is', 'on', 'the', 'mat'): 1})
Counter()
0
0.0
[0.3333333333333333, 0.2, 0.0, 0.0]
BLEU score: 0.2582
