In [4]:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import csv
import random

def renumber_sentences(input_file, output_file):
    # 读取原始数据
    with open(input_file, 'r') as f:
        reader = csv.DictReader(f)
        data = list(reader)  # 保存所有行数据

    # 提取所有原始sentenceID
    original_ids = [row['sentenceID'] for row in data]
    
    # 随机打乱原始ID顺序（保持原有ID但改变顺序）
    shuffled_ids = original_ids.copy()
    random.shuffle(shuffled_ids)

    # 创建新的ID映射（原始ID -> 新顺序ID）
    id_mapping = {original: shuffled for original, shuffled in zip(original_ids, shuffled_ids)}

    # 写入新文件
    with open(output_file, 'w', newline='') as f:
        fieldnames = ['sentenceID', 'gold_label', 'sentence']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        
        for row in data:
            # 保持gold_label和sentence不变，只改变sentenseID
            writer.writerow({
                'sentenceID': id_mapping[row['sentenceID']],
                'gold_label': row['gold_label'],
                'sentence': row['sentence']
            })

    print(f"文件已处理完成，输出到 {output_file}")

# 直接在这里指定输入输出文件路径
input_csv = "subtask1_train_augmentation.csv"  # 替换为您的输入文件路径
output_csv = "subtask1_train.csv"  # 替换为您想要的输出文件路径

# 设置随机种子以便结果可复现（可选）
# random.seed(42)  

# 执行函数
renumber_sentences(input_csv, output_csv)

文件已处理完成，输出到 subtask1_train.csv


In [None]:
import json

# 初始化计数器
true_negatives = 0
true_positives = 0  # 真正例（TP）
false_positives = 0  # 假正例（FP）
false_negatives = 0  # 假反例（FN）


# 打开并读取 output.jsonl 文件
with open('output.jsonl', 'r') as file:
    for line in file:
        data = json.loads(line)  # 解析每一行的 JSON 数据
        label_counter = data.get("label_counter", [])

        # 检查 label_counter 是否有效
        if label_counter and len(label_counter) == 1 and len(label_counter[0]) == 2:
            true_label, prediction = label_counter[0]
            true_label = int(true_label)  
            prediction = int(prediction)  # 将字符串转换为整数

            # 根据真实标签和预测结果更新计数器
            if true_label == 1:
                if prediction == 1:
                    true_positives += 1
                else:
                    false_negatives += 1
            else:
                if prediction == 1:
                    true_negatives += 1
                if prediction == 0:
                    false_positives += 1

# 计算 Recall、Precision 和 F1 Score
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# 输出结果
print(true_positives)
print(false_negatives)
print(false_negatives)
print(true_negatives)
print(f"Recall: {recall:.4f}")
print(f"Precision: {precision:.4f}")
print(f"F1 Score: {f1_score:.4f}")

{}


KeyError: '0'