### get accuracy, precision, recall, f1_score

In [3]:
import json
import re

def load_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            data.append(json.loads(line))
    return data

def clean_prediction(prediction):
    """ 清洗 predict 字段，提取'正面'或'负面' """
    prediction = prediction.strip().lower()  # 去除空格和换行符
    match = re.search(r"(正|负)", prediction)  # 只提取“正”或“负”
    return match.group(1) if match else "unknown"  # 如果匹配不到，返回"unknown"

def calculate_metrics(data):
    """ 计算准确率、精确率、召回率、F1 分数 """
    TP = FP = TN = FN = 0    

    for entry in data:
        label = entry['label'].strip().lower()  # 去掉换行符
        predict = clean_prediction(entry.get('predict', ''))  # 清理 predict 里的多余内容

        # 计算 TP, FP, TN, FN
        if label == '正面' and predict == '正':  # 预测对了（正类）
            TP += 1
        elif label == '负面' and predict == '正':  # 误判为正类（本来是负面）
            FP += 1
        elif label == '负面' and predict == '负':  # 预测对了（负类）
            TN += 1
        elif label == '正面' and predict == '负':  # 误判为负类（本来是正面）
            FN += 1    

    # 计算评估指标
    total = TP + FP + TN + FN
    accuracy = (TP + TN) / total if total > 0 else 0
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0    
    
    return accuracy, precision, recall, f1_score

def main(file_path):
    """ 主函数，计算并输出指标 """
    data = load_jsonl(file_path)
    accuracy, precision, recall, f1_score = calculate_metrics(data)    
#     print(f"Accuracy: {accuracy:.4f}")
#     print(f"Precision: {precision:.4f}")
#     print(f"F1-Score: {f1_score:.4f}")
#     print(f"Recall: {recall:.4f}")
    print(f"Accuracy: {accuracy:.4f} ({accuracy * 100:.2f}%)")
    print(f"Precision: {precision:.4f} ({precision * 100:.2f}%)")
    print(f"Recall: {recall:.4f} ({recall * 100:.2f}%)")
    print(f"F1-Score: {f1_score:.4f} ({f1_score * 100:.2f}%)")
    


# Zero-shot
# file_path = 'Text Analysis/zero-shot/Llama-3-Cantonese-8B-Instruct_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl' 
# file_path = 'Text Analysis/zero-shot/Llama-3-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl' 
# file_path = 'Text Analysis/zero-shot/Llama-3-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl' 
# file_path = 'Text Analysis/zero-shot/Llama-3.1-Cantonese-8B-Instruct_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl' 
# file_path = 'Text Analysis/zero-shot/Llama-3.1-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Llama-3.1-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Qwen-7B-Chat-Cantonese_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Qwen-7B-Chat-Cantonese_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Qwen-7B-Chat-Cantonese_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Qwen2-Cantonese-7B-Instruct_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Qwen2-Cantonese-7B-Instruct_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/zero-shot/Qwen2-Cantonese-7B-Instruct_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'

#Finetuning 
# file_path = 'Text Analysis/finetuning/Llama-3-Cantonese-8B-Instruct_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Llama-3-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Llama-3-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'

# file_path = 'Text Analysis/finetuning/Llama-3.1-Cantonese-8B-Instruct_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Llama-3.1-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Llama-3.1-Cantonese-8B-Instruct_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'

# file_path = 'Text Analysis/finetuning/Qwen-7B-Chat-Cantonese_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Qwen-7B-Chat-Cantonese_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Qwen-7B-Chat-Cantonese_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'

# file_path = 'Text Analysis/finetuning/Qwen2-Cantonese-7B-Instruct_Openrice_test_alpaca_no_emojis_generated_predictions.jsonl'
# file_path = 'Text Analysis/finetuning/Qwen2-Cantonese-7B-Instruct_Openrice_test_alpaca_with_emojis_generated_predictions.jsonl'
file_path = 'Text Analysis/finetuning/Qwen2-Cantonese-7B-Instruct_Openrice_test_alpaca_with_emojis_texts_generated_predictions.jsonl'


main(file_path)

Accuracy: 0.9006 (90.06%)
Precision: 0.9882 (98.82%)
Recall: 0.8996 (89.96%)
F1-Score: 0.9418 (94.18%)
