In [24]:
import re

# 输入数据
data1 = """
Understandability Scores:
  Score 1: 63.1
  Score 0: 25.5
  Score -1: 11.4

Naturalness Scores:
  Score 1: 60.4
  Score 0: 36.2
  Score -1: 3.4

Recommendation Scores:
  Score 1: 73.2
  Score 0: 17.4
  Score -1: 9.4
"""

data2 = """
Understandability Scores:
  Score 1: 8.1
  Score 0: 22.8
  Score -1: 69.1

Naturalness Scores:
  Score 1: 10.1
  Score 0: 55.7
  Score -1: 34.2

Recommendation Scores:
  Score 1: 14.8
  Score 0: 23.5
  Score -1: 61.7
"""


# 分别提取各项指标
def extract_block(data, section_name):
    pattern = rf"{section_name} Scores:\n(?:\s*Score [10-]+: [0-9.]+\n?)+"
    match = re.search(pattern, data)
    if match:
        return extract_scores(match.group())
    else:
        raise ValueError(f"Section '{section_name}' not found.")

understandability1 = extract_block(data1, "Understandability")
naturalness1 = extract_block(data1, "Naturalness")
recommendation1 = extract_block(data1, "Recommendation")

understandability2 = extract_block(data2, "Understandability")
naturalness2 = extract_block(data2, "Naturalness")
recommendation2 = extract_block(data2, "Recommendation")


# 按照你的规则计算 data3
def compute_data3(scores1, scores2):
    return {
        1: (scores1[1] + scores2[-1]) / 2,   
        0: (scores1[0] + scores2[0]) / 2,    
        -1: (scores1[-1] + scores2[1]) / 2   
    }

# 计算三个指标
understandability3 = compute_data3(understandability1, understandability2)
naturalness3 = compute_data3(naturalness1, naturalness2)
recommendation3 = compute_data3(recommendation1, recommendation2)

# 输出结果
print(f"&{understandability3[1]:.1f}& {understandability3[0]:.1f}& {understandability3[-1]:.1f}")
print(f"&{naturalness3[1]:.1f}& {naturalness3[0]:.1f}& {naturalness3[-1]:.1f}")
print(f"&{recommendation3[1]:.1f}& {recommendation3[0]:.1f}& {recommendation3[-1]:.1f}")

&66.1& 24.1& 9.8
&47.3& 46.0& 6.8
&67.5& 20.4& 12.1


In [16]:
def analyze_dialogue_scores():
    metrics = ['naturalness', 'recommendation', 'understandability']
    scores = {metric: {'-1': 0, '0': 0, '1': 0} for metric in metrics}
    
    # Regular expression pattern to match dialogue scores
    import re
    pattern = r"Dialogue \d+ Scores: \{'naturalness': '(-?\d+)', 'recommendation': '(-?\d+)', 'understandability': '(-?\d+)'\}"
    
    # Read the file content (assuming it's already loaded in a string variable 'content')
    with open('/data/liyuanzi/HUAWEI/GUsim_V5/src/evaluator/result/eval_common_pairwiseB4ominiVSB3.5_U38B2.log', 'r') as f:
        content = f.read()
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    # Count the occurrences
    total_dialogues = len(matches)
    for match in matches:
        naturalness, recommendation, understandability = match
        scores['naturalness'][naturalness] += 1
        scores['recommendation'][recommendation] += 1
        scores['understandability'][understandability] += 1
    
    # Print the results
    print(f"Total dialogues analyzed: {total_dialogues}")
    print("\nScore distribution:")
    print("-" * 50)
    print(f"{'Metric':<20} | {'-1':<10} | {'0':<10} | {'1':<10}")
    print("-" * 50)
    
    for metric in metrics:
        print(f"{metric.capitalize()} Scores:")
        for score, count in scores[metric].items():
            print(f"  Score {score}: {count / total_dialogues * 100:.1f}")
        print()
        minus_one = scores[metric]['-1']
        zero = scores[metric]['0']
        one = scores[metric]['1']
        minus_one_percent = (minus_one / total_dialogues) * 100
        zero_percent = (zero / total_dialogues) * 100
        one_percent = (one / total_dialogues) * 100
        
        # print(f"{metric:<20} | {minus_one:<5} ({minus_one_percent:.1f}%) | {zero:<5} ({zero_percent:.1f}%) | {one:<5} ({one_percent:.1f}%)")
        
    print("-" * 50)

analyze_dialogue_scores()


Total dialogues analyzed: 142

Score distribution:
--------------------------------------------------
Metric               | -1         | 0          | 1         
--------------------------------------------------
Naturalness Scores:
  Score -1: 54.9
  Score 0: 40.1
  Score 1: 4.9

Recommendation Scores:
  Score -1: 76.1
  Score 0: 11.3
  Score 1: 12.7

Understandability Scores:
  Score -1: 78.2
  Score 0: 11.3
  Score 1: 10.6

--------------------------------------------------
