## Function definition for main task

In [None]:
import pandas as pd
import json
import numpy as np
from nltk.corpus import words as nltk_words
from Levenshtein import distance as levenshtein
from gensim import models
import nltk
import re

def load_json(filepath):
    """
    Loads a JSON file and returns a list of dictionaries.

    Args:
        filepath (str): Path to the JSON file.

    Returns:
        list: List of parsed JSON objects (dictionaries).
    """
    with open(filepath, "r", encoding="utf-8") as f:
        return json.load(f)


# Load nltk word list(noun)
def load_english_words():
    """
    Downloads and loads a set of English words from the nltk corpus.

    Returns:
        set: A set of valid English words.
    """
    nltk.download('words')
    return set(nltk_words.words())

ENGLISH_WORDS = load_english_words()

def is_english_word(word):
    """
    Checks whether a given word exists in the loaded English word list.

    Args:
        word (str): The word to be checked.

    Returns:
        bool: True if the word exists in the English dictionary, False otherwise.
    """
    return word in ENGLISH_WORDS

# Alphabet construction and word validation functions
def validate_word(correct_word, user_word):
    """
    Validates whether the user's word is a valid anagram of the correct word
    or exactly matches the correct word.

    Args:
        correct_word (str): The original correct word.
        user_word (str): The word provided by the user.

    Returns:
        str: The validated user word if correct, otherwise an empty string.
    """
    correct_word = clean_text(correct_word)
    user_word = clean_text(user_word)

    if sorted(correct_word) == sorted(user_word) and is_english_word(user_word):
        return user_word
    elif correct_word == user_word:
        return user_word
    else:
        return ""

# Clean text
def clean_text(text):
    """
    Cleans input text based on the preprocessing requirements of the specific task.

    Args:
        text (str): The input string to be cleaned.
        keep_pipe (bool): If True, retains the '|' character (used for original answers with multiple references).

    Returns:
        str: The cleaned text, processed according to the specified task requirements.
    """
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^a-zA-Z0-9ㄱ-ㅎㅏ-ㅣ가-힣]', '', text)
    return text

## Define Vectorization & Similarity Functions
# Sentence vector calculation functions
def get_sentence_vector(sentence, model):
    """
    Computes the sentence vector using a pre-trained FastText model.

    Args:
        sentence (str): The input sentence.
        model: FastText model for generating word embeddings.

    Returns:
        np.ndarray: The averaged word vectors for the sentence. If no valid tokens exist, returns a zero vector.
    """
    tokens = sentence.split()
    valid_tokens = [token for token in tokens if token in model.wv.key_to_index]
    return np.mean([model.wv[token] for token in valid_tokens], axis=0) if valid_tokens else np.zeros(model.vector_size)

# Cosine similarity calculation function
def cosine_similarity(vec1, vec2):
    """
    Computes cosine similarity between two vectors.

    Args:
        vec1 (np.ndarray): First vector.
        vec2 (np.ndarray): Second vector.

    Returns:
        float: Cosine similarity score. Returns -1 if either vector has zero norm.
    """
    norm1, norm2 = np.linalg.norm(vec1), np.linalg.norm(vec2)
    return np.dot(vec1, vec2) / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else -1.0


## Evaluate Models and Compute Metrics
# Calculate model-specific performance metrics
def evaluate_model_performance(df, models, ft_model):
    overall_results = {}

    for model in models:
        # Accuracy
        df[f"{model}_accuracy"] = [
            100 if validate_word(row[f"{model}_answer"], row["original"]) else 0
            for _, row in df.iterrows()
        ]

        # Edit distance
        df[f"{model}_edit_distance"] = [
            levenshtein(row[f"{model}_answer"], row["original"])
            for _, row in df.iterrows()
        ]

        # Cosine Similarity
        df[f"{model}_cosine_similarity"] = [
            cosine_similarity(
                get_sentence_vector(row[f"{model}_answer"], ft_model),
                get_sentence_vector(row["original"], ft_model)
            )
            for _, row in df.iterrows()
        ]

        # Overall performance evaluation by model
        overall_results[model.upper()] = {
            "Accuracy": np.mean(df[f"{model}_accuracy"]),
            "Edit Distance": np.mean(df[f"{model}_edit_distance"]),
            "Cosine Similarity": np.mean(df[f"{model}_cosine_similarity"])
        }

    # Store overall evaluation metrics
    df_performance = pd.DataFrame.from_dict(overall_results, orient='index')

    return df_performance

## Load Evaluation Model(Fasttext_eng)

In [None]:
# Load Fasttext model
ft_model = models.fasttext.load_facebook_model('cc.en.300.bin')

## Load files(each prompt)

In [None]:
# Load files of each prompt
zrs = load_json(" ")
cot = load_json(" ")
icl = load_json(" ")
data_original = load_json(" ")  # answer(original) data

# Adjust Dataset to Minimum Length
min_length = min(len(data_original), len(zrs), len(cot), len(icl))

# transform to Dataframe
df = pd.DataFrame({
    "original": [item["original"] for item in data_original[:min_length]],
    "transformed": [item["transformed"] for item in data_original[:min_length]],
    "zrs": [item["Answer"] for item in zrs[:min_length]],
    "cot": [item["Answer"] for item in cot[:min_length]],
    "icl": [item["Answer"] for item in icl[:min_length]],
})

# apply 'clean_text'
df = df.fillna('').applymap(clean_text)

print(df.info())
df.head()

## Display Final Performance Results

In [None]:
# evaluate each prompt
models = ["zrs", "cot", "cot_icl"]
df_performance = evaluate_model_performance(df, models, ft_model)

print(df_performance)