In [15]:
import docx 
from rouge import Rouge
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import bert_score

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
doc = docx.Document(r'4_ground_truth\6055.HK.docx')

# Read the contents
ground_truth = ''.join([para.text for para in doc.paragraphs])


14594

In [18]:
def extract_text_from_docx(docx_path):
    doc = docx.Document(docx_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

def compute_rouge(generated, ground_truth):
    rouge = Rouge()
    scores = rouge.get_scores(generated, ground_truth, avg=True)
    return scores

def compute_cosine_similarity(doc1, doc2):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([doc1, doc2])
    return cosine_similarity(vectors[0:1], vectors[1:2])[0][0]

def compute_bleu(generated, ground_truth):
    reference = [ground_truth.split()]
    candidate = generated.split()
    smoothie = SmoothingFunction().method4
    score = sentence_bleu(reference, candidate, smoothing_function=smoothie)
    return score

def compute_bertscore(generated, ground_truth):
    P, R, F1 = bert_score.score([generated], [ground_truth], lang="en", verbose=True)
    return F1.mean().item()

In [19]:
generated_path = r'2_report_log\6055.HK_20241123_2059.txt'
ground_truth_path = r'4_ground_truth\6055.HK.docx'

generated_report = open(generated_path, "r").read()
ground_truth_report = extract_text_from_docx(ground_truth_path)

In [20]:
rouge_score = compute_rouge(generated_report, ground_truth_report)
print(f"rouge score: {rouge_score}")

rouge score: {'rouge-1': {'r': 0.1565217391304348, 'p': 0.2819843342036554, 'f': 0.20130474843819632}, 'rouge-2': {'r': 0.03360116873630387, 'p': 0.06774668630338733, 'f': 0.044921870567555866}, 'rouge-l': {'r': 0.1463768115942029, 'p': 0.26370757180156656, 'f': 0.18825721814928673}}


In [21]:
similarity = compute_cosine_similarity(generated_report, ground_truth_report)
print(f"Cosine Similarity: {similarity}")

Cosine Similarity: 0.7128947745817923


In [22]:
bleu_score = compute_bleu(generated_report, ground_truth_report)
print(f"BLEU Score: {bleu_score}")

BLEU Score: 0.00815540344271176


In [23]:
bertscore = compute_bertscore(generated_report, ground_truth_report)
print(f"BERTScore: {bertscore}")

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:01<00:00,  1.15s/it]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 61.38it/s]

done in 1.17 seconds, 0.85 sentences/sec
BERTScore: 0.7085480093955994



