## Imports

In [5]:
import json
from openai import OpenAI
from tqdm import tqdm
from collections import Counter
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
import pandas as pd

## Define Functions

In [50]:
def ask_question(question, client, max_tokens=200, temperature=2, top_p=0.9):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                'role': 'user',
                'content': question,
            }
        ],
        model='llama3',
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=False
    )
    answer = chat_completion.choices[0].message.content
    return answer

def evaluate_model(client, test_dataset, prompt):
    data = []
    references = []
    for example in tqdm(test_dataset, desc="Evaluating"):
        sentence = example['text_ja']
        ground_truth = example['text_en']
        predicted_translation = translate_llm(sentence, prompt, client)
        data.append({
            'sentence': sentence,
            'ground_truth': ground_truth,
            'predicted_translation': predicted_translation
        })
        references.append(ground_truth)
    
    df = pd.DataFrame(data)
    bleu = calculate_bleu(df['predicted_translation'].tolist(), references)
    return df, bleu

def translate_llm(sentence, prompt, client):
    question = prompt.format(text=sentence)
    answer = ask_question(question, client)
    
    # Split the answer into parts based on the "Translation:" keyword
    parts = answer.split("Translation:")
    if len(parts) > 1:
        # Extract the translations from the last part
        translations = parts[-1].strip()
        
        # Split the translations into sentences
        sentences = translations.split("\n")
        
        # Initialize an empty list to store the translated sentences
        translated_sentences = []
        
        # Iterate over each sentence
        for sentence in sentences:
            # Check if the sentence starts with a number followed by a dot and a space
            if sentence.strip() and sentence.split(". ", 1)[0].isdigit():
                # Remove the number and dot from the beginning of the sentence
                sentence = sentence.split(". ", 1)[1].strip()
            
            # Append the processed sentence to the list of translated sentences
            translated_sentences.append(sentence)
        
        # Join the translated sentences into a single string
        translated_text = "\n".join(translated_sentences)
    else:
        # No "Translation:" keyword found, return the original answer
        translated_text = answer.strip()
    
    #print(translated_text)
    return translated_text

def calculate_bleu(predictions, references):
    reference_corpus = [[ref.split()] for ref in references]
    prediction_corpus = [pred.split() for pred in predictions]
    
    # Create a smoothing function
    smoothie = SmoothingFunction().method4
    bleu4 = corpus_bleu(reference_corpus, prediction_corpus, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothie)
    return bleu4

## Load dataset

In [38]:
# Load the JSON data
with open(r".\open-mantra-dataset\annotation.json", "r", encoding="utf-8") as file:
    data = json.load(file)

# Extract the testing set
test_dataset = []
for book in data:
    for page in book["pages"]:
        for text in page["text"]:
            if "text_ja" in text and "text_en" in text:
                test_dataset.append({
                    "text_ja": text["text_ja"],
                    "text_en": text["text_en"]
                })

## Prompts

In [42]:
# Set up the prompt for translation
prompt_baseline = """You are a translator from Japanese to English. 
Some rules to remember:

Maintaining the contents' accuracy is important, but since texts are from manga, we want to prioritize naturalness and ease of communication.
Instead of translating word by word, try to translate the whole sentence or phrase at once.
Number of translated sentences should be the same as the number of input sentences.
Return translations without additional explanations, comments, notes or interactions. Simply use the format for translations.

Here's the format.

Input:
1. Sentence in Japanese

Translation:
1. Translation in English

Input:
1. {text}
"""

## Run 

In [51]:
subset = test_dataset[:5]

# ollama running on local
client = OpenAI(base_url='http://localhost:11434/v1/', api_key='ollama')
df, bleu4 = evaluate_model(client, test_dataset, prompt_baseline)
print(f"BLEU Score: {bleu4*100:.4f}")

Evaluating: 100%|██████████| 1592/1592 [10:35<00:00,  2.51it/s]

BLEU Score: 2.8821





In [52]:
df

Unnamed: 0,sentence,ground_truth,predicted_translation
0,綴じ眼のシオラ,bound eye siora,Siola of Twined Eyes
1,朽鷹みつき,Mitsuki Kuchitaka,Sparrowhawk's Whisker
2,だからっ,I'm telling you!!,So then!
3,知らないって言ってるだろっ,I don't know what you're talking about!,So you're saying you don't know?
4,そんな借金なんて!,i don't owe you!,Don't even mention that debt!
...,...,...,...
1587,こりゃかなわん,you beat me,Don't expect me to get it!
1588,さて,well!\n,"Well, then."
1589,さて明日は墓掃除だ!,tomorrow we clean the graves!,"Ah, tomorrow's the graveyard cleanup, I guess!"
1590,こ、こら,hey!!!,Geez!
