-
Notifications
You must be signed in to change notification settings - Fork 0
/
overall_scores.py
86 lines (71 loc) · 2.96 KB
/
overall_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import json
import os
from tabulate import tabulate
from huggingface_hub import hf_hub_download
# Hugging Face dataset repository details
dataset_repo = "cstr/Capybara-de-snippets"
dataset_files = [
"Capybara_de_GPT4_scored.jsonl",
"Capybara_de_Claude-3-Opus_scored.jsonl",
"Capybara_de_GPT3.5_scored.jsonl",
"Capybara_de_GPT4_scored.jsonl",
"Capybara_de_deepl_scored.jsonl",
"Capybara_de_mixtral_scored.jsonl",
"Capybara_de_occiglot_scored.jsonl",
"Capybara_de_wmt19_scored.jsonl",
"Capybara_de_discolm_scored.jsonl",
"Capybara_de_TowerInstruct2_scored.jsonl",
"Capybara_de_t5madlad_scored.jsonl",
"Capybara_de_wmt21_scored.jsonl",
"Capybara_de_wmt21_scored.jsonl",
"Capybara_de_nbbl_scored.jsonl",
]
# Download the scored JSONL files from Hugging Face
file_paths = {}
for file_name in dataset_files:
file_path = hf_hub_download(repo_id=dataset_repo, filename=file_name, repo_type="dataset")
file_paths[file_name] = file_path
print(f"Downloaded {file_name} to {file_path}")
# Create a dictionary to store the overall scores for each file
overall_scores = {}
# Process each scored JSONL file
for file_name in dataset_files:
# Open the scored JSONL file
with open(file_paths[file_name], "r", encoding="utf-8") as file:
scored_data = [json.loads(line) for line in file]
# Initialize variables to store the sum and count of scores
input_score_sum = 0
output_score_sum = 0
total_turns = 0
# Iterate over each conversation in the scored data
for conv in scored_data:
# Iterate over each turn in the conversation
for turn in conv["conversation"]:
input_score = turn["input_score"]
output_score = turn["output_score"]
# Check if the scores are lists and extract the first element
if isinstance(input_score, list):
input_score = input_score[0]
if isinstance(output_score, list):
output_score = output_score[0]
input_score_sum += input_score
output_score_sum += output_score
total_turns += 1
# Calculate the average input and output scores
avg_input_score = input_score_sum / total_turns
avg_output_score = output_score_sum / total_turns
# Calculate the overall score as the average of input and output scores
overall_score = (avg_input_score + avg_output_score) / 2
# Store the overall score for the file
overall_scores[file_name] = overall_score
# Create a list of tuples containing the file name and overall score
score_list = list(overall_scores.items())
# Sort the score list in decreasing order of overall scores
score_list.sort(key=lambda x: x[1], reverse=True)
# Create a table to display the overall scores
table_data = []
for file_name, score in score_list:
table_data.append([file_name, score])
# Print the table using the tabulate library
headers = ["File", "Overall Score"]
print(tabulate(table_data, headers, tablefmt="grid"))