In [45]:
import json
import csv

# Wczytaj dane wejściowe
with open('rawExDomains.json', 'r') as file:
    data = json.load(file)

# Lista tasków do mapowania
glue_tasks = {'cola', 'mnli', 'mrpc', 'qnli', 'sst2', 'wnli'}

all_tasks = set()
processed_data = {}

for domain, values in data.items():
    mmlu_sum = 0
    glue_sum = 0
    arithmetic_sum = 0
    gpqa_sum = 0
    rte_sum = 0
    excluded_sum = 0
    other_tasks = {}

    for task, count in values.items():
        if not isinstance(count, int):
            continue

        # Usuń taski zawierające 'n_shot'
        if 'n_shot' in task:
            excluded_sum += count
            continue

        # Zmapuj tylko gpqa zawierające zeroshot, resztę pomiń
        if 'gpqa' in task:
            if 'zeroshot' in task:
                gpqa_sum += count
            else:
                excluded_sum += count
            continue

        # Usuń taski 'rte' i pomniejsz count
        if 'rte' in task:
            rte_sum += count
            continue

        if task.startswith('mmlu_'):
            mmlu_sum += count
        elif task in glue_tasks:
            glue_sum += count
        elif 'arithmetic' in task:
            arithmetic_sum += count
        else:
            other_tasks[task] = count
            all_tasks.add(task)

    if mmlu_sum > 0:
        other_tasks['mmlu'] = mmlu_sum
        all_tasks.add('mmlu')
    if glue_sum > 0:
        other_tasks['glue'] = glue_sum
        all_tasks.add('glue')
    if arithmetic_sum > 0:
        other_tasks['arithmetic'] = arithmetic_sum
        all_tasks.add('arithmetic')
    if gpqa_sum > 0:
        other_tasks['gpqa'] = gpqa_sum
        all_tasks.add('gpqa')

    # Zmniejsz count domeny o sumę usuniętych tasków
    original_count = values.get('count', 0)
    adjusted_count = max(0, original_count - excluded_sum - rte_sum)

    processed_data[domain] = {
        'count': adjusted_count,
        **other_tasks
    }

# Posortuj po count
sorted_data = dict(sorted(processed_data.items(), key=lambda item: item[1]['count'], reverse=True))

# Zapisz do JSON
with open('processed_data_KO.json', 'w', encoding='utf-8') as json_file:
    json.dump(sorted_data, json_file, ensure_ascii=False, indent=2)



In [46]:
import json

with open('processed_data_KO.json', 'r') as file:
    data = json.load(file)

tasks_to_remove = ["llmzszl"]

def remove_tasks(data, tasks_to_remove):
    for domain, tasks in data.items():
        for task in tasks_to_remove:
            if task in tasks:
                tasks["count"] -= tasks[task]
                del tasks[task]
    return data

data = remove_tasks(data, tasks_to_remove)

with open('processed_data_KO.json', 'w') as file:
    json.dump(data, file, indent=4)

print("Zadania zostały usunięte i count zaktualizowane.")

Zadania zostały usunięte i count zaktualizowane.


In [47]:
import json
with open('processed_data_KO.json', 'r') as file:
    data = json.load(file)

for domain in data.values():
    domain_sorted = {k: v for k, v in sorted(domain.items(), key=lambda item: item[1], reverse=True)}
    domain.clear()
    domain.update(domain_sorted)

sorted_data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1]["count"], reverse=True)}

with open('processed_data_KO.json', 'w') as file:
    json.dump(sorted_data, file, indent=4)

In [48]:
import json
import csv

Threshold = 600

with open('processed_data_KO.json', 'r') as file:
    data = json.load(file)

data = {domain: values for domain, values in data.items() if values['count'] > Threshold}

csv_data = [(domain, values['count']) for domain, values in data.items()]

with open('ExDomains.csv', 'w', newline='', encoding='utf-8') as file: #+ str(Threshold) + '.csv'
    writer = csv.writer(file, delimiter=';')
    writer.writerow(['Domain', 'Count'])
    writer.writerows(csv_data)

In [44]:
import json
import csv

Threshold = 95
Task = "llmzszl"  # Task, dla którego chcesz zapisać dane

with open('processed_data_KO.json', 'r') as file:
    data = json.load(file)

# Filtrowanie danych tylko dla LLMZSZL, gdzie count > Threshold
filtered_data = {
    domain: values for domain, values in data.items()
    if Task in values and values[Task] > Threshold
}

# Przygotowanie danych do zapisania
csv_data = [(domain, values[Task]) for domain, values in filtered_data.items()]

# Zapis do pliku CSV
with open('ExDomains-LLMZSZL.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file, delimiter=';')
    writer.writerow(['Domain', Task])
    writer.writerows(csv_data)
