In [1]:
import json
import pandas as pd

def print_incorrect_fields(file_path):
    def get_incorrect_fields(evaluation):
        # Check if any evaluation field has isCorrect set to false and return those fields
        fields = ['russianSentence', 'finnishSentence', 'translationAccuracy', 'russianWordUsage', 'finnishWordUsage']
        incorrect_fields = {}
        for field in fields:
            if not evaluation.get(field, {}).get('isCorrect', True):
                incorrect_fields[field] = evaluation[field]
        return incorrect_fields
    
    evaluations = {}
    
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            try:
                json_object = json.loads(line.strip())
                evaluation = json_object.get('evaluation', {})
                incorrect_fields = get_incorrect_fields(evaluation)
                if incorrect_fields:
                    evaluations[json_object.get("id")] = incorrect_fields
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON: {e}")      

    return evaluations



In [15]:
EXCEL_FOLDER = '../ProfiRu/'
CHAPTER_NAME = 'sm3_kap4'
IDS_2_MARK_DELETED = [5574, 
              5620, 
              5493,
              5408,
              5681,
              5437,
              5557,
              5764, 
              5858,
              5861,
              5864,
              5867,
              5871,
              5873,
              5891,
              5922]

In [24]:
def prepare_file_with_eval_comments(profi=True):
    # Path to the Excel file
    if profi:
        excel_file_path = f"{EXCEL_FOLDER}{CHAPTER_NAME}_profi.xlsx"
    else: 
        excel_file_path = f"{EXCEL_FOLDER}{CHAPTER_NAME}.xlsx"
    # Path to the JSON lines file
    # sm3_kap4_evaluations.jsonl
    json_lines_file_path = f"{CHAPTER_NAME}_evaluations.jsonl"

    # Load the Excel file
    df = pd.read_excel(excel_file_path)
    
    # Update the isDeleted field for the specified ids
    df.loc[df['id'].isin(IDS_2_MARK_DELETED), 'isDeleted'] = 1

    # Get the evaluations with incorrect fields
    evaluations = print_incorrect_fields(json_lines_file_path)

    # Add the commentAutoEval column to the dataframe
    df['commentAutoEval'] = df['id'].apply(lambda x: json.dumps(evaluations.get(x, {}), ensure_ascii=False) if x in evaluations else '')

    # Save the updated dataframe back to Excel
    updated_excel_file_path = f"{EXCEL_FOLDER}{CHAPTER_NAME}_profi_with_autoeval.xlsx"
    df.to_excel(updated_excel_file_path, index=False)

    print(f"Updated Excel file saved to: {updated_excel_file_path}")
    


In [17]:
CHAPTER_NAME = 'sm3_kap4'
prepare_file_with_eval_comments()

Updated Excel file saved to: ../ProfiRu/sm3_kap4_profi_with_autoeval.xlsx


In [18]:
CHAPTER_NAME = 'sm3_kap5'
prepare_file_with_eval_comments()

Updated Excel file saved to: ../ProfiRu/sm3_kap5_profi_with_autoeval.xlsx


In [19]:
CHAPTER_NAME = 'sm3_kap6'
prepare_file_with_eval_comments()

Updated Excel file saved to: ../ProfiRu/sm3_kap6_profi_with_autoeval.xlsx


In [20]:
CHAPTER_NAME = 'sm3_kap7'
prepare_file_with_eval_comments()

Updated Excel file saved to: ../ProfiRu/sm3_kap7_profi_with_autoeval.xlsx


In [21]:
CHAPTER_NAME = 'sm3_kap8'
prepare_file_with_eval_comments()

Updated Excel file saved to: ../ProfiRu/sm3_kap8_profi_with_autoeval.xlsx


In [25]:
# my old chapter
EXCEL_FOLDER = '../EvalExcels//'
CHAPTER_NAME = 'sm2_new_puhekieli'
prepare_file_with_eval_comments(profi=False)

Updated Excel file saved to: ../EvalExcels//sm2_new_puhekieli_profi_with_autoeval.xlsx
