In [2]:
import json

evals_path = "./evaluation_metrics/with_crossover_strategy/full_evals.jsonl"
evaluation_data_path = "./evaluation_data/query.jsonl"

def read_jsonl(file_path: str):
    existing_data = []
    with open(file_path, "r") as file:
        for line in file:
            line = line.strip() # Remove leading/trailing whitespace and newlines
            if line: # Ensure the line is not empty
                try:
                    json_object = json.loads(line)
                    existing_data.append(json_object)
                except json.JSONDecodeError as e:
                    print(f"Error parsing line: {e}")
                    continue
    return existing_data


In [3]:
eval_data = read_jsonl(evaluation_data_path)

In [None]:
id_by_topic = {data['id']:data['topic'] for data in eval_data}
id_by_topic

In [5]:
count_by_topic = {}
for data in eval_data:
    topic = data['topic']
    if topic in count_by_topic:
        count_by_topic[topic] += 1
    else:
        count_by_topic[topic] = 1

In [6]:
evals_data = read_jsonl(evals_path)
evals_data[0]

{'id': 1,
 'prompt': '收集整理目前中国9阶层实际收入和财务状况，特别研究得出中国的中产有哪些特点，实际中产人数，财力等等',
 'comprehensiveness': 0.4263322884012539,
 'insight': 0.4805194805194805,
 'instruction_following': 0.43452380952380953,
 'readability': 0.45739910313901344,
 'overall_score': 0.4525368493437378}

In [7]:
compr_by_topic = {}
insight_by_topic = {}
instr_foll_by_topic = {}
read_by_topic = {}
overall_by_topic = {}
items_by_topic = {}

for eval in evals_data:
    topic = id_by_topic[eval['id']]
    if topic in compr_by_topic:
        compr_by_topic[topic] += eval['comprehensiveness']
        insight_by_topic[topic] += eval['insight']
        instr_foll_by_topic[topic] += eval['instruction_following']
        read_by_topic[topic] += eval['readability']
        overall_by_topic[topic] += eval['overall_score']
    else:
        compr_by_topic[topic] = eval['comprehensiveness']
        insight_by_topic[topic] = eval['insight']
        instr_foll_by_topic[topic] = eval['instruction_following']
        read_by_topic[topic] = eval['readability']
        overall_by_topic[topic] = eval['overall_score']

In [8]:
def average_score_by_topic(score_by_topic, count_by_topic):
    for topic in score_by_topic.keys():
        score_by_topic[topic] /= count_by_topic[topic]
    
    return score_by_topic

compr_by_topic = average_score_by_topic(compr_by_topic, count_by_topic)
insight_by_topic = average_score_by_topic(insight_by_topic, count_by_topic)
instr_foll_by_topic = average_score_by_topic(instr_foll_by_topic, count_by_topic)
read_by_topic = average_score_by_topic(read_by_topic, count_by_topic)
overall_by_topic = average_score_by_topic(overall_by_topic, count_by_topic)

In [10]:
overall_by_topic

{'Finance & Business': 0.45704806031795614,
 'Science & Technology': 0.4639509540587398,
 'Software Development': 0.47400448888107194,
 'Education & Jobs': 0.4486201842580779,
 'Health': 0.4595513795236078,
 'Literature': 0.43857246793840304,
 'History': 0.4609689304073729,
 'Hardware': 0.47603263896876985,
 'Industrial': 0.4637030252433332,
 'Art & Design': 0.4750036790096665,
 'Games': 0.5036190959758875,
 'Crime & Law': 0.4759683158423955,
 'Entertainment': 0.43205054124397974,
 'Sports & Fitness': 0.45627504950923536,
 'Software': 0.5078903746926131,
 'Transportation': 0.46020136920200216,
 'Religion': 0.4595312567408717,
 'Home & Hobbies': 0.4594243980678213,
 'Travel': 0.4243729027429348,
 'Food & Dining': 0.46092805185209496,
 'Fashion & Beauty': 0.4576062004401472,
 'Social Life': 0.4674229470454804}

In [None]:
import pandas as pd

df = pd.DataFrame(list(overall_by_topic.items()), columns=['Category', 'Average Overall Score'])

# Save to CSV
csv_filename = 'overall_score_by_topics.csv'
# df.to_csv(csv_filename, index=False)

print(df.head())
print(f"\nSaved to {csv_filename}")

               Category  Average Overall Score
0    Finance & Business               0.457048
1  Science & Technology               0.463951
2  Software Development               0.474004
3      Education & Jobs               0.448620
4                Health               0.459551

Saved to overall_score_by_topics.csv
