In [1]:
from BERTScoreCalculator import BERTCalculator
from MeteorCalculator import MeteorCalculator
from NISTCalculator import NISTCalculator
from RougeCalculator import RougeCalculator # pip install datasets==2.21.0
from sacreBLEUCalculator import sacreBLEUCalculator
import json
import pandas as pd # type: ignore
import os
import warnings
dir = os.getcwd()

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def read_json_file(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    key_mapping  = {'News_Body' : 'reference', 'Prediction': 'prediction'} #summary or  News_Body
    subset_data = [{key_mapping.get(key, key): d[key] for key in d} for d in data]
    
    keys_to_keep = ['reference', 'prediction']
    subset_data = [{key: d[key] for key in keys_to_keep if key in d} for d in subset_data]
    
    return subset_data

def data_preprocess(input_json_path, json_file_path):
    json_data = read_json_file(json_file_path)

    with open(input_json_path, 'w') as f:
        json.dump(json_data, f, indent=4)

    return input_json_path

def json_to_list(file_path):
    with open(file_path, 'r') as file:
        # Load the JSON data
        data = json.load(file)
    return data

def corpus_level_extraction(data):
    corpus_level = data[-1]
    data = data[:-1]
    return corpus_level, 

def ScoreCalculator(name, input_json_path):
    # Calculator
    score_name = name + "Calculator" 
    calculator = eval(score_name)()
    
    result = calculator.compute_from_file(input_json_path)

    # Extract the corpus level scores from the data
    corpus_level, data = corpus_level_extraction(result) 

    return corpus_level, data

def dump_to_json(merged_list, file_path):
    # Dumping the list to a JSON file
    with open(file_path, 'w') as f:
        json.dump(merged_list, f, indent=4)

    print("JSON file has been created successfully.")

def declare(idataset, isize, imethod, irank):

    file_type = 'json'

    dataset_size = ['8', '16', '32', '64', '128', '256', '512', '1024', '2048', '4096', '8192', 'full']
    methods = ['adalora', 'ia3', 'lora', 'qlora']
    datasets = ['scinews','elife']
    ranks = ['rank4', 'rank32', 'rank512']

    data_sub_dir = os.path.join(dir,'results')
    json_sub_dir = os.path.join(dir,'json')

    size = dataset_size[isize]
    method = methods[imethod]    
    dataset = datasets[idataset]
    rank = ranks[irank]

    #data_sub_dir = os.path.join(run_name[index], size , method)

    if method == 'ia3':
        name = f'{dataset}_{method}_{size}'
    else: 
        name = f'{dataset}_{rank}_{method}_{size}'

    json_file_path = os.path.join(data_sub_dir, f'results_{name}.json')
    input_json_path = os.path.join(json_sub_dir, f'{name}.json')
    file_path = os.path.join(json_sub_dir, f'output_{name}.json')

    return json_file_path, input_json_path, file_path

def to_json(file_path, data, corpus_level_BERT, data_BERT, corpus_level_Meteor, data_Meteor, corpus_level_Rouge, data_Rouge, corpus_level_scareBLEU, data_scareBLEU):
    merged_list = []
    for index, d in enumerate(data):
        merged_dict = {**d, **data_BERT[index], **data_Meteor[index], **data_Rouge[index], **data_scareBLEU[index]}
        merged_list.append(merged_dict)
    result = {'corpus_level': {**corpus_level_BERT['corpus_level'], **corpus_level_Meteor['corpus_level'], **corpus_level_Rouge['corpus_level'], **corpus_level_scareBLEU['corpus_level']}}
    merged_list.append(result)

    # File path where you want to save the JSON file
    dump_to_json(merged_list, file_path)

def scores_to_dict(json_file_path, input_json_path, file_path):

    input_json_path = data_preprocess(input_json_path, json_file_path)
    data = json_to_list(json_file_path)

    corpus_level_BERT, data_BERT = ScoreCalculator("BERT", input_json_path)
    corpus_level_Meteor, data_Meteor = ScoreCalculator("Meteor", input_json_path)
    corpus_level_Rouge, data_Rouge = ScoreCalculator("Rouge", input_json_path)
    corpus_level_scareBLEU, data_scareBLEU = ScoreCalculator("sacreBLEU", input_json_path)

    to_json(file_path, data, corpus_level_BERT, data_BERT, corpus_level_Meteor, data_Meteor, corpus_level_Rouge, data_Rouge, corpus_level_scareBLEU, data_scareBLEU)


In [None]:

# run_name = ['lr4_b1','lr4_b2','lr4_b3','lr3_b1','lr3_b2','lr3_b2','lr2_b1','lr3_b2','lr2_b3']
# dataset_size = ['0:0shot', '1:1shot', '2:2shot', '3:4shot', '4:8', '5:16', '6:32', '7:64', '8:128', '9:256', 
# '10:512', '11:1024', '12:2048', '13:4096', '14:8192', '15:full']
# learningrate = ['lr2', 'lr3', 'lr4', 'lr5']
# num_beam = ['beam1', 'beam2', 'beam3']
# epochs = ['ep3', 'ep5', 'ep7']
# methods = ['adalora', 'ia3', 'lora', 'qlora']
# datasets = ['scinews','elife']
# ranks = ['rank4', 'rank32', 'rank512']
imethod = 2
isize = 5
idataset = 0
irank = 0
json_file_path, input_json_path, file_path = declare(idataset, isize, imethod, irank)
scores_to_dict(json_file_path, input_json_path, file_path)

not possible


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\anush\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\anush\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\anush\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


{'rouge1': np.float64(0.004813477737665463), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.0024067388688327313), 'rougeLsum': np.float64(0.004813477737665463)}
{'rouge1': np.float64(0.004264392324093817), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.004264392324093817), 'rougeLsum': np.float64(0.004264392324093817)}
{'rouge1': np.float64(0.007874015748031494), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.003937007874015747), 'rougeLsum': np.float64(0.007874015748031494)}
{'rouge1': np.float64(0.0033222591362126247), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.0033222591362126247), 'rougeLsum': np.float64(0.0033222591362126247)}
{'rouge1': np.float64(0.0061162079510703364), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.0061162079510703364), 'rougeLsum': np.float64(0.0061162079510703364)}
{'rouge1': np.float64(0.005319148936170213), 'rouge2': np.float64(0.0), 'rougeL': np.float64(0.005319148936170213), 'rougeLsum': np.float64(0.005319148936170213)}
{'rouge1': np.f