In [None]:
from src.utils.logger import Logger
from src.utils.data_utils import load_model, prepare_tokenizer, load_dataset, split_dataset, tokenize_dataset, \
    evaluate_perplexity, evaluate_accuracy, SplitEnum
from src.utils.env_loader import KOTLIN_DATASET_PATH, MODEL_NAME, PYTHON_DATASET_PATH, MODEL_PATH
from transformers import AutoModelForCausalLM

In [None]:
logger = Logger().get_logger()

In [None]:
model_name = MODEL_NAME
kotlin_dataset_path = KOTLIN_DATASET_PATH
python_dataset_path = PYTHON_DATASET_PATH
model_path = MODEL_PATH

In [None]:
model = load_model(model_name, model_path)
# model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = prepare_tokenizer(model_name)

In [None]:
logger.info("Starting evaluation on Kotlin dataset")
kotlin_dataset = load_dataset(kotlin_dataset_path)
kotlin_test_data = split_dataset(kotlin_dataset, SplitEnum.TEST)
tokenized_kotlin_test_dataset = tokenize_dataset(tokenizer, kotlin_test_data)
kotlin_perplexity = evaluate_perplexity(model, tokenizer, tokenized_kotlin_test_dataset)
kotlin_accuracy = evaluate_accuracy(model, tokenizer, tokenized_kotlin_test_dataset)

In [None]:
logger.info("Starting evaluation on Python dataset")
python_dataset = load_dataset(python_dataset_path)
python_test_data = split_dataset(python_dataset, SplitEnum.TEST)
tokenized_python_test_dataset = tokenize_dataset(tokenizer, python_test_data)
python_perplexity = evaluate_perplexity(model, tokenizer, tokenized_python_test_dataset)
python_accuracy = evaluate_accuracy(model, tokenizer, tokenized_python_test_dataset)

In [None]:
logger.info("Evaluation complete. Results:")
logger.info(f"Kotlin Accuracy: {kotlin_accuracy:.4f}")
logger.info(f"Kotlin Perplexity: {kotlin_perplexity:.2f}")
logger.info(f"Python Accuracy: {python_accuracy:.4f}")
logger.info(f"Python Perplexity: {python_perplexity:.2f}")