In [None]:
import json

def read_jsonl_file(filepath):
    """
    Reads the JSONL file and returns a list of dictionaries.
    
    Args:
        filepath (str): Path to the JSONL file.
    
    Returns:
        data (list): A list of dictionaries containing the data from the JSONL file.
    """
    data = []
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line.strip()))
    return data

In [None]:
def extract_numerical_data(data):
    """
    Extracts numerical fields from each entry in the data.
    
    Args:
        data (list): A list of dictionaries containing data from the JSONL file.
    
    Returns:
        numerical_data (dict): A dictionary containing lists of numerical values for each field.
    """
    numerical_data = {
        "precision": [],
        "recall": [],
        "f1": [],
        "onto_conf": [],
        "rel_halluc": [],
        "sub_halluc": [],
        "obj_halluc": []
    }
    
    for entry in data:
        numerical_data["precision"].append(float(entry.get("precision", 0.0)))
        numerical_data["recall"].append(float(entry.get("recall", 0.0)))
        numerical_data["f1"].append(float(entry.get("f1", 0.0)))
        numerical_data["onto_conf"].append(float(entry.get("onto_conf", 0.0)))
        numerical_data["rel_halluc"].append(float(entry.get("rel_halluc", 0.0)))
        numerical_data["sub_halluc"].append(float(entry.get("sub_halluc", 0.0)))
        numerical_data["obj_halluc"].append(float(entry.get("obj_halluc", 0.0)))
    
    return numerical_data

In [53]:
def calculate_averages(numerical_data):
    """
    Calculates the average for each numerical field and returns them with prefixed keys.
    
    Args:
        numerical_data (dict): A dictionary containing lists of numerical values for each field.
    
    Returns:
        averages (dict): A dictionary containing the average values for each field, with "avg_" prefix.
    """
    averages = {
        "avg_precision": 0.0,
        "avg_recall": 0.0,
        "avg_f1": 0.0,
        "avg_onto_conf": 0.0,
        "avg_rel_halluc": 0.0,
        "avg_sub_halluc": 0.0,
        "avg_obj_halluc": 0.0
    }
    
    for key, values in numerical_data.items():
        avg_key = f"avg_{key}"
        if values:
            averages[avg_key] = sum(values) / len(values)
    
    return averages


In [54]:
def save_to_jsonl(data, output_filepath):
    """
    Saves a list of dictionaries to a JSONL file.
    
    Args:
        data (list): A list of dictionaries containing average values for each file.
        output_filepath (str): The path to the output JSONL file.
    """
    with open(output_filepath, 'w', encoding='utf-8') as file:
        for record in data:
            json.dump(record, file)
            file.write("\n")

In [55]:
def process_multiple_files(files, output_filepath):
    """
    Processes multiple files to calculate and save their average statistics.
    
    Args:
        files (list): A list of tuples where each tuple contains:
                      - the filepath to the file
                      - the ontology name (e.g., "1_movie", "2_music")
        output_filepath (str): The path to the output JSONL file.
    """
    all_results = []
    
    for filepath, ontology in files:
        # Step 1: Read the JSONL file
        data = read_jsonl_file(filepath)
        
        # Step 2: Extract numerical data
        numerical_data = extract_numerical_data(data)
        
        # Step 3: Calculate averages for "all_test_cases"
        averages_all = calculate_averages(numerical_data)
        averages_all.update({"onto": ontology, "type": "all_test_cases"})
        all_results.append(averages_all)
        
    
    # Step 5: Save all the results to a single JSONL file
    save_to_jsonl(all_results, output_filepath)

In [10]:
files = [
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_1_university_llm_stats.jsonl", "1_university"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_2_musicalwork_llm_stats.jsonl", "2_musicalwork"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_3_airport_llm_stats.jsonl", "3_airport"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_4_building_llm_stats.jsonl", "4_building"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_5_athlete_llm_stats.jsonl", "5_athlete"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_6_politician_llm_stats.jsonl", "6_politician"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_7_company_llm_stats.jsonl", "7_company"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_8_celestialbody_llm_stats.jsonl", "8_celestialbody"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_9_astronaut_llm_stats.jsonl", "9_astronaut"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_10_comicscharacter_llm_stats.jsonl", "10_comicscharacter"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_11_meanoftransportation_llm_stats.jsonl", "11_meanoftransportation"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_12_monument_llm_stats.jsonl", "12_monument"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_13_food_llm_stats.jsonl", "13_food"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_14_writtenwork_llm_stats.jsonl", "14_writtenwork"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_15_sportsteam_llm_stats.jsonl", "15_sportsteam"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_16_city_llm_stats.jsonl", "16_city"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_17_artist_llm_stats.jsonl", "17_artist"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_18_scientist_llm_stats.jsonl", "18_scientist"),
    ("../data/dbpedia_webnig/alpha_vicuna/vicuna/improvised_evaluation_statistics/ont_19_film_llm_stats.jsonl", "19_film"),
]



OUTPUT_FILEPATH = '/upb/users/b/balram/profiles/unix/cs/Text2KG/withont/data/dbpedia_webnig/alpha_vicuna/vicuna/overall_avg_statistics/vicuna_overall_averages_without_missing_GT_improvised_evaluation.jsonl'
