In [14]:
import json

evals_path = "./evaluation_metrics/with_crossover_strategy/full_evals.jsonl"
evaluation_data_path = "./evaluation_data/query.jsonl"

def read_jsonl(file_path: str):
    existing_data = []
    with open(file_path, "r") as file:
        for line in file:
            line = line.strip() # Remove leading/trailing whitespace and newlines
            if line: # Ensure the line is not empty
                try:
                    json_object = json.loads(line)
                    existing_data.append(json_object)
                except json.JSONDecodeError as e:
                    print(f"Error parsing line: {e}")
                    continue
    return existing_data


In [19]:
eval_data = read_jsonl(evaluation_data_path)

In [None]:
id_by_topic = {data['id']:data['topic'] for data in eval_data}
id_by_topic

In [22]:
count_by_topic = {}
for data in eval_data:
    topic = data['topic']
    if topic in count_by_topic:
        count_by_topic[topic] += 1
    else:
        count_by_topic[topic] = 1

In [17]:
evals_data = read_jsonl(evals_path)
evals_data[0]

{'id': 1,
 'prompt': '收集整理目前中国9阶层实际收入和财务状况，特别研究得出中国的中产有哪些特点，实际中产人数，财力等等',
 'comprehensiveness': 0.4263322884012539,
 'insight': 0.4805194805194805,
 'instruction_following': 0.43452380952380953,
 'readability': 0.45739910313901344,
 'overall_score': 0.4525368493437378}

In [38]:
compr_by_topic = {}
insight_by_topic = {}
instr_foll_by_topic = {}
read_by_topic = {}
overall_by_topic = {}
items_by_topic = {}

for eval in evals_data:
    topic = id_by_topic[eval['id']]
    if topic in compr_by_topic:
        compr_by_topic[topic] += eval['comprehensiveness']
        insight_by_topic[topic] += eval['insight']
        instr_foll_by_topic[topic] += eval['instruction_following']
        read_by_topic[topic] += eval['readability']
        overall_by_topic[topic] += eval['overall_score']
    else:
        compr_by_topic[topic] = eval['comprehensiveness']
        insight_by_topic[topic] = eval['insight']
        instr_foll_by_topic[topic] = eval['instruction_following']
        read_by_topic[topic] = eval['readability']
        overall_by_topic[topic] = eval['overall_score']

In [39]:
def average_score_by_topic(score_by_topic, count_by_topic):
    for topic in score_by_topic.keys():
        score_by_topic[topic] /= count_by_topic[topic]
    
    return score_by_topic

compr_by_topic = average_score_by_topic(compr_by_topic, count_by_topic)
insight_by_topic = average_score_by_topic(insight_by_topic, count_by_topic)
instr_foll_by_topic = average_score_by_topic(instr_foll_by_topic, count_by_topic)
read_by_topic = average_score_by_topic(read_by_topic, count_by_topic)
overall_by_topic = average_score_by_topic(overall_by_topic, count_by_topic)

In [45]:
compr_by_topic

{'Finance & Business': 0.4136751891316087,
 'Science & Technology': 0.4281786827070988,
 'Software Development': 0.4579773967728741,
 'Education & Jobs': 0.4226308812399672,
 'Health': 0.44273860001288123,
 'Literature': 0.40329886570566476,
 'History': 0.4396204320110796,
 'Hardware': 0.43922318495660984,
 'Industrial': 0.43614712358526464,
 'Art & Design': 0.44440094949463654,
 'Games': 0.4780193035157988,
 'Crime & Law': 0.4662426035502959,
 'Entertainment': 0.41291021757857005,
 'Sports & Fitness': 0.4228450231408997,
 'Software': 0.4808801860293778,
 'Transportation': 0.4476920117963842,
 'Religion': 0.43712121212121213,
 'Home & Hobbies': 0.43801092944730763,
 'Travel': 0.39440525881118726,
 'Food & Dining': 0.44974766070516425,
 'Fashion & Beauty': 0.4416149887848001,
 'Social Life': 0.45316456861012955}

In [None]:
import pandas as pd

df = pd.DataFrame(list(read_by_topic.items()), columns=['Category', 'Average Readability Score'])

# Save to CSV
csv_filename = 'readability_score_by_topics.csv'
# df.to_csv(csv_filename, index=False)

print(df.head())
print(f"\nSaved to {csv_filename}")

               Category  Average Readability Score
0    Finance & Business                   0.490952
1  Science & Technology                   0.478679
2  Software Development                   0.494004
3      Education & Jobs                   0.477333
4                Health                   0.482616

Saved to readability_score_by_topics.csv
