In [11]:
import csv
import json
import re # Type convert

def convert_value(value):
    value = value.strip()
    if not value:
        return None


    if value.lower() == 'true':
        return True
    if value.lower() == 'false':
        return False
    if value.lower() == 'null':
        return None 

    # Try to convert
    try:
        return int(value)
    except ValueError:
        try:
            return float(value)
        except ValueError:
            pass
    
    return value


# Convert CSV to jsonl file
def csv_to_jsonl(input_csv_file, output_jsonl_file, delimiter=','):
    
    try:
        with open(input_csv_file, mode='r', newline='', encoding='utf-8') as infile, \
             open(output_jsonl_file, mode='w', encoding='utf-8') as outfile:
            
            reader = csv.reader(infile, delimiter=delimiter)
        
            header = [h.strip() for h in next(reader)]
            
            
            for i, row in enumerate(reader):
                if len(row) != len(header):
                    print(f"Warning: Row {i+2} has column ({len(row)}) different from header ({len(header)}).Skipping.")
                    continue 
                
                record = {}
                for j, field_name in enumerate(header):
                    record[field_name] = convert_value(row[j])

                outfile.write(json.dumps(record, ensure_ascii=False) + '\n')
        
        print(f"Convert. completed: '{input_csv_file}' -> '{output_jsonl_file}'")

    except FileNotFoundError:
        print(f"Error: File not found '{input_csv_file}'")
    except Exception as e:
        print(f"Error: {e}")



In [None]:
import csv
import json
import re 
import os




def convert_all_csv_to_jsonl(input_folder, output_folder, delimiter=','):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.csv'):
            input_file = os.path.join(input_folder, filename)
            output_file = os.path.join(output_folder, filename.replace('.csv', '.jsonl'))
            csv_to_jsonl(input_file, output_file, delimiter)





In [None]:
convert_all_csv_to_jsonl('csvFiles/scores_prometheus', 'outputs_folder/scores_prometheus', delimiter=',')
convert_all_csv_to_jsonl('csvFiles/scores_manual', 'outputs_folder/scores_manual', delimiter=';')
convert_all_csv_to_jsonl('csvFiles/scores_gemini', 'outputs_folder/scores_gemini', delimiter=',')
convert_all_csv_to_jsonl('csvFiles/translations', 'outputs_folder/translations', delimiter=',')

csv_to_jsonl('csvFiles/report.csv', 'outputs_folder/report.jsonl', delimiter=',')
csv_to_jsonl('csvFiles/final_scores_metrics_gemini.csv', 'outputs_folder/final_scores_metrics_gemini.jsonl', delimiter=',')
csv_to_jsonl('csvFiles/final_scores_metrics_prometheus.csv', 'outputs_folder/final_scores_metrics_prometheus.jsonl', delimiter=',')


In [None]:
# for each file csv in the folder, rename the file from judge_models-gemini-1.5-flash-latest_cerbero_base.csv to scored_cerbero_base.csv where the formatting is scored_model_prompt.csv

llm_eval_path = "csvFiles/scores_gemini/"

for filename in os.listdir(llm_eval_path):
    if filename.endswith('.csv'):
        new_filename = filename.replace('judge_models-', 'scored_').replace('_gemini-1.5-flash-latest', '').replace('.csv', '.csv')
        old_path = os.path.join(llm_eval_path, filename)
        new_path = os.path.join(llm_eval_path, new_filename)
        os.rename(old_path, new_path)
        print(f"Renamed {filename} to {new_filename}")