In [2]:
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

In [3]:
import json


EVAL_FOLDER = '../../data/russian-english/cards/eval_results/ru_eng_'
def get_eval_results_from_file(file_name):
    data = []
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_ENG_OUTPUT_FOLDER = '../../data/russian-english/cards/test_cards/ru_eng_'
def get_ru_eng_cards_from_file(file_name):
    data = []
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_FINN_FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def get_ru_finn_cards_from_file(file_name):
    data = []
    file_path = RU_FINN_FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    res = []
    for card in data:
        if 'isMarkedDeleted' not in card.keys():
            res.append(card)
    return res


ENG_FINN_OUTPUT_FOLDER = '../../data/english-finnish/cards/test_cards/eng_finn_'
def get_eng_finn_cards_from_file(file_name):
    data = []
    file_path = ENG_FINN_OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


OUTPUT_FOLDER = '../../data/english-finnish/cards/test_cards/eng_finn_'
def write_cards_to_file(file_name, cards):
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(cards, f, ensure_ascii=False, indent=2)
        
        
EVAL_FOLDER = '../../data/english-finnish/cards/eval_results/eng_finn_'
def write_eval_results_to_file(file_name, results):
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

In [4]:
def generate_eng_finn_phrase_card(ru_finn_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()

    system_prompt = '''You are a multilingual assistant who is proficient in Finnish, Russian and English.'''

    user_prompt = f"""
    Translate the given Finnish word or phrase: '{finn_word}' into clear and natural English, reflecting its meaning and context as the primary focus. Use the Russian equivalent: '{ru_word}' as a supportive reference to help clarify or refine the exact context if needed. The goal is to create an English translation that is accurate, fluent, and authentic to native speakers, avoiding overly complex or literal phrasing.

    Please provide the English translation of the word or phrase and nothing else.
    """

    response = client.chat.completions.create(
        model=model,
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )

    tr_word = response.choices[0].message.content.strip()

    card = {}
    card["wordFirstLang"] = tr_word
    card["sentenceFirstLang"] = ""
    card["wordSecondLang"] = finn_word
    card["sentenceSecondLang"] = ""
    card["id"] = card_id

    return card

In [5]:
def generate_eng_finn_full_card(ru_finn_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()

    system_prompt = '''You are a multilingual assistant who is proficient in Finnish, Russian and English.'''

    user_prompt = f"""
    Translate the given Finnish word or phrase along with its Russian translation into English, and then translate the provided Finnish sentence, incorporating the English translation of the word or phrase. Use synonyms or related terms where necessary to convey the intended meaning and maintain naturalness in English.

    Given word or phrase (Finnish): '{ru_word}'
    Given word or phrase (Russian): '{finn_word}'

    Given sentence (Finnish): '{ru_sentence}'
    Given sentence (Russian): '{finn_sentence}'

    ### Response structure:

    Respond in JSON format with the following structure:
    {{
        "englishWord": "Translated word in Englsih",
        "englishSentence": "Translated sentence in English"
    }}
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['englishWord']
    tr_sentence = response_dict['englishSentence']

    card = {}
    card["wordFirstLang"] = tr_word.strip()
    card["sentenceFirstLang"] = tr_sentence.strip()
    card["wordSecondLang"] = finn_word
    card["sentenceSecondLang"] = finn_sentence
    card["id"] = card_id

    return card

In [6]:
def eval_phrase_card(ru_finn_card, eng_finn_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()
    eng_word, eng_sentence, _, _, tr_id = eng_finn_card.values()

    assert(card_id == tr_id)

    system_prompt = '''You are a multilingual assistant who is proficient in Finnish, Russian and English.'''

    user_prompt = f"""
    As an AI model, your task is to evaluate the correctness and naturalness of English translations for given Finnish and Russian words or phrases. Check if the English translation accurately conveys the meaning and context of the Finnish and Russian versions, and whether it sounds natural to a native speaker. Your evaluation does not need to suggest the best possible translation, only confirm that it is good enough and identify any issues if present.

    When suggesting corrections, provide only the final corrected English translation. If no correction is needed, set `suggestedFix` to `null`.

    Here are the words or phrases:  
    - Word or Phrase in Finnish: {finn_word}  
    - Word or Phrase in Russian: {ru_word}  
    - Word or Phrase in English: {eng_word}  

    Respond in JSON format using the following structure:  
    {{
      "translationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct",
        "suggestedFix": "Suggested correction if there is an issue or null if no correction is needed"
      }}
    }}
    """

    response = client.chat.completions.create(
      model=model,
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
      ]
    )

    res = json.loads(response.choices[0].message.content.strip())
    res['id'] = card_id

    return res

In [7]:
def eval_full_card(ru_finn_card, eng_finn_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()
    eng_word, eng_sentence, _, _, tr_id = eng_finn_card.values()

    assert(card_id == tr_id)

    system_prompt = '''You are a multilingual assistant who is proficient in Finnish, Russian and English.'''

    user_prompt = f"""
    Evaluate the correctness of an English word and sentence based on their translations from Finnish and Russian. You will receive a word in Finnish, Russian, and its translation in English, as well as a sentence in Finnish, Russian, and its translation in English. Your task is to assess the quality of the English sentence, the usage of the English word in the sentence, and the accuracy of the translations from Finnish and Russian to English. For each evaluation point, provide a detailed explanation of your judgment and suggest fixes where applicable, either to the English word, the English sentence, or both.

    Please ensure that the English sentence is grammatically correct and natural. Suggest a corrected version if necessary. Verify that the English sentence contains the English word in some form and suggest using synonyms or related terms if the word is missing. Prioritize naturalness and correctness. Ensure that the translations of both the word and sentence from Finnish and Russian to English are accurate and provide corrections if necessary.

    Respond in JSON format with the following structure:
    {{
        "englishSentenceCorrectness": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFix": "Suggested corrected sentence if there is an issue, or null if not applicable."
        }},
        "wordUsage": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFixSentence": "Suggested corrected sentence if the word usage is incorrect, or null if not applicable.",
        "suggestedFixWord": "Suggested corrected word if the word usage is incorrect, or null if not applicable."
        }},
        "wordTranslationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFix": "Suggested correction for translation issues, or null if not applicable."
        }},
        "sentenceTranslationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFix": "Suggested correction for translation issues, or null if not applicable."
        }}
    }}

    Here are the provided word and sentence in Finnish, Russian, and English:

    - Word in Finnish: {finn_word}
    - Word in Russian: {ru_word}
    - Word in English: {eng_word}
    - Sentence in Finnish: {finn_sentence}
    - Sentence in Russian: {ru_sentence}
    - Sentence in English: {eng_sentence}

    Please adhere to this structure to ensure clear, actionable feedback for each evaluation point.
    """

    response = client.chat.completions.create(
      model=model,
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
      ]
    )

    res = json.loads(response.choices[0].message.content.strip())
    res['id'] = card_id

    return res

In [8]:
BASE_SYSTEM_PROMPT = 'You are a multilingual assistant who is proficient in English, Russian and Finnish.'

In [9]:
def improve_eng_finn_phrase_card(ru_finn_card, eng_finn_card, eval_result, system_prompt=BASE_SYSTEM_PROMPT, model=GPT_4o):
    ru_word, _, finn_word, _, card_id = ru_finn_card.values()
    eng_word, _, _, _, tr_id = eng_finn_card.values()
    _, explanation, suggested_fix = eval_result['translationAccuracy'].values()

    assert(card_id == tr_id)

    user_prompt = f"""
    Translate the given Finnish word or phrase: '{finn_word}' into clear and natural English, prioritizing its meaning and how it would sound most authentic and fluent to native speakers. Use the Russian equivalent: '{ru_word}' as a supportive reference if it helps refine the context or meaning. While accuracy is important, favor translations that fit naturally into everyday English, even if they are not the most direct equivalents.

    Consider the following:
    Existing English translation: '{eng_word}'
    Issues identified: '{explanation}'
    Suggested improvement: '{suggested_fix}'
    Instructions:
    Based on the information provided, craft an English translation that balances accuracy, naturalness, and context. You may adapt the word or phrase slightly to ensure it resonates well with native speakers and fits its intended use.

    Respond with only the final English translation without any additional explanations.
    """

    response = client.chat.completions.create(
        model=model,
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    tr_word = response.choices[0].message.content.strip()

    card = {}
    card["wordFirstLang"] = tr_word
    card["sentenceFirstLang"] = ""
    card["wordSecondLang"] = finn_word
    card["sentenceSecondLang"] = ""
    card["id"] = card_id

    return card

In [10]:
def improve_eng_finn_full_card(ru_finn_card, eng_finn_card, eval_result, system_prompt=BASE_SYSTEM_PROMPT, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()
    eng_word, eng_sentence, _, _, tr_id = eng_finn_card.values()

    assert(card_id == tr_id)

    user_prompt = f"""
    Translate the given Finnish word or phrase into English and use it within the provided Finnish sentence to create a natural and accurate English translation. Use the Russian word or sentence as additional context if needed. Focus on accurately conveying the meaning of the Finnish source while incorporating all feedback and suggestions from the evaluation results.

    When choosing the English word or phrase, prioritize naturalness and fluency in the sentence over strict accuracy. While the translation should reflect the original meaning, it is acceptable to use a word or phrase that is not the most direct translation but still conveys the intended sense in a way that sounds natural and idiomatic in English.

    ### Details to guide your translation:

    Finnish word or phrase: '{finn_word}'
    Russian word or phrase: '{ru_word}'
    Finnish sentence: '{finn_sentence}'
    Russian sentence: '{ru_sentence}'
    Existing English translation of the word or phrase: '{eng_word}'
    Existing English translation of the sentence: '{eng_sentence}'

    ### Evaluation Results Summary:

    **English Sentence Evaluation**:

    Correctness: {eval_result['englishSentenceCorrectness']['isCorrect']}
    Explanation: {eval_result['englishSentenceCorrectness']['explanation']}
    Suggested Fix (if applicable): {eval_result['englishSentenceCorrectness']['suggestedFix']}

    **Word Usage Evaluation**:

    Correctness: {eval_result['wordUsage']['isCorrect']}
    Explanation: {eval_result['wordUsage']['explanation']}
    Suggested Fix for Word (if applicable): {eval_result['wordUsage']['suggestedFixWord']}
    Suggested Fix for Sentence (if applicable): {eval_result['wordUsage']['suggestedFixSentence']}

    **Word Translation Accuracy Evaluation**:

    Correctness: {eval_result['wordTranslationAccuracy']['isCorrect']}
    Explanation: {eval_result['wordTranslationAccuracy']['explanation']}
    Suggested Fix (if applicable): {eval_result['wordTranslationAccuracy']['suggestedFix']}

    **Sentence Translation Accuracy Evaluation**:

    Correctness: {eval_result['sentenceTranslationAccuracy']['isCorrect']}
    Explanation: {eval_result['sentenceTranslationAccuracy']['explanation']}
    Suggested Fix (if applicable): {eval_result['sentenceTranslationAccuracy']['suggestedFix']}

    ### Instructions:
    Review the Evaluation Feedback:

    Carefully consider all provided explanations and suggested fixes for the word or phrase, sentence, and overall translation accuracy.
    Translate the Word or Phrase:

    Choose a translation that balances accuracy with naturalness. It is acceptable to use a word that deviates slightly from the most direct translation if it results in a more fluent and idiomatic sentence.
    If the word usage is marked incorrect, incorporate the suggested fix or refine it further for better contextual alignment.
    Translate the Finnish Sentence:

    Integrate the translated word or phrase naturally into the sentence.
    If the sentence translation is marked incorrect, incorporate the suggested fixes and adjust for fluency and clarity.
    Address Translation Accuracy Issues:

    If any translation inaccuracies are identified, apply the suggested fixes or clarify the meaning while ensuring the translation sounds natural and idiomatic.
    Leverage Context:

    Use the Russian word or sentence as additional guidance where necessary.
    Produce a Polished Result:

    Ensure the final translation conveys the intended meaning, aligns naturally with the sentence, and incorporates feedback from the evaluation results.

    ### Response structure:

    Respond in JSON format with the following structure:
    {{
        "englishWord": "Translated word in Englsih",
        "englishSentence": "Translated sentence in English"
    }}
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['englishWord']
    tr_sentence = response_dict['englishSentence']

    card = {}
    card["wordFirstLang"] = tr_word.strip()
    card["sentenceFirstLang"] = tr_sentence.strip()
    card["wordSecondLang"] = finn_word
    card["sentenceSecondLang"] = finn_sentence
    card["id"] = card_id

    return card

In [11]:
def is_phrase_card(card):
    return card['sentenceFirstLang'] == ''

In [12]:
def check_eval(eval_result, is_phrase_card):
    if is_phrase_card:
        return eval_result['translationAccuracy']['isCorrect']
    
    w_tr_accuracy = eval_result['wordTranslationAccuracy']['isCorrect']
    s_tr_accuracy = eval_result['sentenceTranslationAccuracy']['isCorrect']
    s_accuracy = eval_result['englishSentenceCorrectness']['isCorrect']
    w_accuracy = eval_result['wordUsage']['isCorrect']
    
    return all([w_tr_accuracy, s_tr_accuracy, s_accuracy, w_accuracy])

In [13]:
def card_handler(ru_finn_card, num_iter=5):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    eng_finn_card = {}

    if is_phrase_card:
        eng_finn_card = generate_eng_finn_phrase_card(ru_finn_card)
    else:
        eng_finn_card = generate_eng_finn_full_card(ru_finn_card)

    eval_result = {}

    if is_phrase_card:
        eval_result = eval_phrase_card(ru_finn_card, eng_finn_card)
    else:
        eval_result = eval_full_card(ru_finn_card, eng_finn_card)

    for _ in range(num_iter):
        if check_eval(eval_result, is_phrase_card):
            break
        
        if is_phrase_card:
            eng_finn_card = improve_eng_finn_phrase_card(ru_finn_card, eng_finn_card, eval_result)
        else:
            eng_finn_card = improve_eng_finn_full_card(ru_finn_card, eng_finn_card, eval_result)

        if is_phrase_card:
            eval_result = eval_phrase_card(ru_finn_card, eng_finn_card)
        else:
            eval_result = eval_full_card(ru_finn_card, eng_finn_card)
    
    return eng_finn_card, eval_result

In [14]:
def generate_eng_finn_cards_from_file(file_name):
    ru_finn_cards = get_ru_finn_cards_from_file(file_name)

    eng_finn_cards = []
    eval_results = []

    for ru_finn in tqdm(ru_finn_cards):
        while True:
            try:
                eng_finn, eval_res = card_handler(ru_finn)

                eng_finn_cards.append(eng_finn)
                eval_results.append(eval_res)

                break
            except Exception:
                pass

    write_cards_to_file(file_name, eng_finn_cards)
    write_eval_results_to_file(file_name, eval_results)

In [16]:
file_names = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

In [16]:
first_file = file_names[0]
first_file

'sm1_new_kap1.json'

In [15]:
def check_file_accuracy(file_name):
    eng_finn_cards = get_ru_eng_cards_from_file(file_name)
    eval_results = get_eval_results_from_file(file_name)

    assert(len(eng_finn_cards) == len(eval_results))

    results = []
    for eng_finn, eval_res in list(zip(eng_finn_cards, eval_results)):
        is_phrase_card = (eng_finn['sentenceFirstLang'] == '')
        results.append(check_eval(eval_res, is_phrase_card))
    
    return sum(results) / len(results)

In [18]:
for file_name in tqdm(file_names):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [55]:
second_chapter = [f'sm2_new_kap{i}.json' for i in range(1, 9)]

In [56]:
for file_name in tqdm(second_chapter):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/239 [00:00<?, ?it/s]

0.9790794979079498


  0%|          | 0/195 [00:00<?, ?it/s]

0.9846153846153847


  0%|          | 0/217 [00:00<?, ?it/s]

0.9769585253456221


  0%|          | 0/298 [00:00<?, ?it/s]

0.9932885906040269


  0%|          | 0/233 [00:00<?, ?it/s]

0.9699570815450643


  0%|          | 0/206 [00:00<?, ?it/s]

0.9805825242718447


  0%|          | 0/237 [00:00<?, ?it/s]

0.9873417721518988


  0%|          | 0/194 [00:00<?, ?it/s]

0.9845360824742269


In [57]:
sm2_last_file = 'sm2_new_puhekieli.json'

In [58]:
generate_eng_finn_cards_from_file(sm2_last_file)
print(check_file_accuracy(sm2_last_file))

  0%|          | 0/110 [00:00<?, ?it/s]

0.9818181818181818


In [19]:
third_chapter = [f'sm3_kap{i}.json' for i in range(1, 9)]

In [20]:
third_chapter[0]

'sm3_kap1.json'

In [22]:
for file_name in tqdm(third_chapter):
    generate_ru_eng_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/157 [00:00<?, ?it/s]

0.9872611464968153


  0%|          | 0/208 [00:00<?, ?it/s]

0.9759615384615384


  0%|          | 0/150 [00:00<?, ?it/s]

0.9933333333333333


  0%|          | 0/144 [00:00<?, ?it/s]

0.9791666666666666


  0%|          | 0/158 [00:00<?, ?it/s]

0.9936708860759493


  0%|          | 0/221 [00:00<?, ?it/s]

0.9773755656108597


  0%|          | 0/177 [00:00<?, ?it/s]

0.9943502824858758


  0%|          | 0/240 [00:00<?, ?it/s]

0.9708333333333333


In [166]:
tmp_ru_eng = get_ru_eng_cards_from_file(second_chapter[4])
tmp_evals = get_eval_results_from_file(second_chapter[4])
tmp_ru_finn = get_ru_finn_cards_from_file(second_chapter[4])

In [167]:
len(tmp_ru_finn), len(tmp_evals), len(tmp_ru_eng)

(233, 233, 233)

In [168]:
ids = [card['id'] for card in tmp_ru_eng]
bad = [card for card in tmp_ru_finn if card['id'] not in ids]
bad

[]

In [169]:
bad_cards = []

for ru_finn, eval in list(zip(tmp_ru_finn, tmp_evals)):
    is_phrase_card = (ru_finn['sentenceFirstLang'] == '')

    if not check_eval(eval, is_phrase_card):
        bad_cards.append(ru_finn)

In [170]:
for ru_finn, eval in list(zip(tmp_ru_finn, tmp_evals)):
    is_phrase_card = (ru_finn['sentenceFirstLang'] == '')

    if not check_eval(eval, is_phrase_card):
        card, eval = card_handler(ru_finn)
        print(card)
        print(eval)
        print('===================')

{'wordFirstLang': 'то есть', 'sentenceFirstLang': 'Моё имя Анна, то есть я Анна.', 'wordSecondLang': 'in other words', 'sentenceSecondLang': 'My name is Anna, or in other words, I am Anna.', 'id': 4013}
{'englishSentenceCorrectness': {'isCorrect': True, 'explanation': 'The English sentence is grammatically correct and sounds natural.', 'suggestedFix': None}, 'wordUsage': {'isCorrect': True, 'explanation': "The word 'in other words' is used correctly within the sentence to restate the name.", 'suggestedFixSentence': None, 'suggestedFixWord': None}, 'wordTranslationAccuracy': {'isCorrect': True, 'explanation': "The phrase 'то есть' in Russian and 'eli' in Finnish accurately translate to 'in other words' in English.", 'suggestedFix': None}, 'sentenceTranslationAccuracy': {'isCorrect': True, 'explanation': 'The English translation of the sentence maintains the same meaning as the original Russian and Finnish sentences.', 'suggestedFix': None}, 'id': 4013}
{'wordFirstLang': 'вы не прочитали

In [129]:
lol = "abra kadabra"
print(f'magic is {lol}')

magic is abra kadabra


In [16]:
def regressor(ru_finn_card, eng_finn_cards, n_models=3, model=GPT_4o):
    eng_word, eng_sentence, _, _, card_id = eng_finn_cards[0].values()
    ru_word, ru_sentence, _, _, card_id = ru_finn_card.values()

    tr_words = [card['wordSecondLang'] for card in eng_finn_cards]
    tr_sentences = [card['sentenceSecondLang'] for card in eng_finn_cards]

    models_response = ''
    for i in range(n_models):
        models_response += f"""{i + 1}. `Model {i + 1}` Response: {{ "finnishWord": '{tr_words[i]}', "finnishSentence": '{tr_sentences[i]}'}}\n"""

    system_prompt = '''Act as a translation evaluator and synthesizer. Assess model-generated translations in Finnish for a English word/phrase and its sentence, prioritizing accuracy, fluency, and contextual fit. If there won't be enough context for word/phrase and its sentence then use as a guidance Russian translation for this word/phrase and its sentence. Return the best translations in JSON format with refinements if necessary.'''

    user_prompt = f"""
**Instruction for the Regressor**:  
Evaluate the translations provided by different models for a given English word or phrase and its accompanying sentence with their Russian translations for help with selecting context for translation. Select or synthesize the best Finnish translation for the word and sentence based on:
1. **Accuracy**: Ensure the translation reflects the original English meaning correctly.
2. **Naturalness**: The Finnish translations should be fluid and use appropriate synonyms or phrasing where needed.
3. **Consistency**: The translated sentence should appropriately incorporate the word's translation.

### Input:
Given word or phrase (English): '{eng_word}'
Given word or phrase (Russian): '{ru_word}'  

Given sentence (English): '{eng_sentence}'  
Given sentence (Russian): '{ru_sentence}'

Responses from models:
{models_response}

---

### Task:
Critically evaluate these responses, identify the best translations, and synthesize a single, high-quality translation. If needed, refine the translations to ensure accuracy and naturalness. Do not simply copy; improve where necessary.

### Response structure:
Respond in JSON format with the following structure:
{{
    "finnishWord": "Best translated word in Finnish",
    "finnishSentence": "Best translated sentence in Finnish"
}}

---

### Key Considerations for Evaluation:
- **Accuracy**: Does the translation reflect the original meaning and nuances in English?
- **Fluency**: Is the translation grammatically correct and natural in Finnish?
- **Contextual Fit**: Does the sentence correctly integrate the translation of the word or phrase?
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['finnishWord']
    tr_sentence = response_dict['finnishSentence']

    card = {}
    card["wordFirstLang"] = eng_word
    card["sentenceFirstLang"] = eng_sentence
    card["wordSecondLang"] = tr_word.strip()
    card["sentenceSecondLang"] = tr_sentence.strip()
    card["id"] = card_id

    return card

In [17]:
def meaxture_of_agents(ru_finn_card, n_models=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    eng_finn_cards = []
    for _ in range(n_models):
        eng_finn_card = {}

        if is_phrase_card:
            eng_finn_card = generate_eng_finn_phrase_card(ru_finn_card)
        else:
            eng_finn_card = generate_eng_finn_full_card(ru_finn_card)

        eng_finn_cards.append(eng_finn_card)

    card = regressor(ru_finn_card, eng_finn_cards, n_models)

    return card

    #eval_result = {}

    #if is_phrase_card:
    #    eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    #else:
    #    eval_result = eval_full_card(ru_finn_card, ru_eng_card)

In [18]:
BASE_SYSTEM_PROMPT = f'''You are a multilingual assistant who is proficient in English, Russian and Finnish.'''

In [19]:
def moa_and_eval(ru_finn_card):
    ru_eng_card = meaxture_of_agents(ru_finn_card)
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    assert(ru_finn_card['id'] == ru_eng_card['id'])

    eval_res = {}
    if is_phrase_card:
        eval_res = eval_phrase_card(ru_finn_card, ru_eng_card)
    else:
        eval_res = eval_full_card(ru_finn_card, ru_eng_card)

    return ru_eng_card, eval_res

In [20]:
system_prompts = [
    "Act as a linguistic expert specializing in Russian, Finnish, and English translations. Evaluate the given English word and sentence translations in conjunction with the Russian context. Based on evaluation feedback, refine the word and sentence translations for accuracy, fluency, and naturalness, ensuring idiomatic Finnish usage. Provide the response in JSON format.",
    "Take on the role of a translator focusing on contextual accuracy. Use the provided English and Russian inputs, along with feedback evaluations, to create an Finnish translation that balances precise meaning with natural phrasing. Address inaccuracies and ensure the translation reads fluent?ly and idiomatically. Respond in JSON format.",
    "Serve as a cultural localization specialist. Translate the provided English word and sentence into Finnish, using the Russian context and feedback to enhance naturalness and idiomatic expression. Focus on making the translation contextually relevant and fluent for an Finnish-speaking audience. Provide your output in JSON format."
]

In [21]:
def meaxture_of_improving_agents(ru_finn_card, eng_finn_card, eval_result, n_models=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    improved_cards = []
    for s_p in system_prompts:
        improved_eng_finn_card = {}

        if is_phrase_card:
            improved_eng_finn_card = improve_eng_finn_phrase_card(ru_finn_card, eng_finn_card, eval_result, system_prompt=s_p)
        else:
            improved_eng_finn_card = improve_eng_finn_full_card(ru_finn_card, eng_finn_card, eval_result, system_prompt=s_p)

        improved_cards.append(improved_eng_finn_card)

    card = regressor(ru_finn_card, improved_cards, n_models)

    return card

    #eval_result = {}

    #if is_phrase_card:
    #    eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    #else:
    #    eval_result = eval_full_card(ru_finn_card, ru_eng_card)

In [22]:
def card_handler_with_agents(ru_finn_card, num_iter=5, num_agents=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    eng_finn_card = {}

    if is_phrase_card:
        eng_finn_card = generate_eng_finn_phrase_card(ru_finn_card)
    else:
        eng_finn_card = generate_eng_finn_full_card(ru_finn_card)

    eval_result = {}

    if is_phrase_card:
        eval_result = eval_phrase_card(ru_finn_card, eng_finn_card)
    else:
        eval_result = eval_full_card(ru_finn_card, eng_finn_card)

    for _ in range(num_iter):
        if check_eval(eval_result, is_phrase_card):
            break
        
        if is_phrase_card:
            eng_finn_card = meaxture_of_improving_agents(ru_finn_card, eng_finn_card, eval_result, num_agents)
        else:
            eng_finn_card = meaxture_of_improving_agents(ru_finn_card, eng_finn_card, eval_result, num_agents)

        if is_phrase_card:
            eval_result = eval_phrase_card(ru_finn_card, eng_finn_card)
        else:
            eval_result = eval_full_card(ru_finn_card, eng_finn_card)
    
    return eng_finn_card, eval_result

In [23]:
import logging

logging.basicConfig(
    filename='errors.txt',
    level=logging.ERROR,
    format='%(message)s',
    filemode='a'
)

def generate_eng_finn_cards_from_file(file_name):
    ru_finn_cards = get_ru_finn_cards_from_file(file_name)

    eng_finn_cards = []
    eval_results = []

    for ru_finn in tqdm(ru_finn_cards):
        while True:
            try:
                eng_finn, eval_res = card_handler_with_agents(ru_finn)
                eng_finn_cards.append(eng_finn)
                eval_results.append(eval_res)
                break
            except Exception as e:
                logging.error(str(e))
                pass

    assert len(eng_finn_cards) == len(eval_results)

    results = []
    for ru_eng, eval_res in zip(eng_finn_cards, eval_results):
        is_phrase_card = (ru_eng['sentenceFirstLang'] == '')
        results.append(check_eval(eval_res, is_phrase_card))
    
    write_cards_to_file(file_name, eng_finn_cards)
    write_eval_results_to_file(file_name, eval_results)

In [24]:
def check_file_accuracy(file_name):
    eng_finn_cards = get_eng_finn_cards_from_file(file_name)
    eval_results = get_eval_results_from_file(file_name)

    assert(len(eng_finn_cards) == len(eval_results))

    results = []
    for eng_finn, eval_res in list(zip(eng_finn_cards, eval_results)):
        is_phrase_card = (eng_finn['sentenceFirstLang'] == '')
        results.append(check_eval(eval_res, is_phrase_card))
    
    return sum(results) / len(results)

In [26]:
file_names = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

In [28]:
def get_bad_card(file_name, id):
    ru_finn_cards = get_ru_finn_cards_from_file(file_name)
    return ru_finn_cards[id]

In [33]:
bad_card = get_bad_card(file_names[0], 41)
bad_card

{'wordFirstLang': 'Конечно',
 'sentenceFirstLang': '',
 'wordSecondLang': 'Kyllä',
 'sentenceSecondLang': '',
 'id': 1041}

In [34]:
card_handler_with_agents(bad_card)

({'wordFirstLang': '{\n  "translation": "Yes"\n}',
  'sentenceFirstLang': '',
  'wordSecondLang': 'Kyllä',
  'sentenceSecondLang': '',
  'id': 1041},
 {'translationAccuracy': {'isCorrect': False,
   'explanation': "The Finnish word 'Kyllä' accurately translates to 'Yes'. However, the Russian word 'Конечно' more accurately translates to 'Of course' rather than just 'Yes'.",
   'suggestedFix': 'Of course'},
  'id': 1041})

In [36]:
lol = []

for file_name in tqdm(file_names):
    lol.append(check_file_accuracy(file_name))

print(sum(lol) / len(lol))

  0%|          | 0/9 [00:00<?, ?it/s]

0.9891912645942335


In [35]:
for file_name in tqdm(file_names):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

0.9933774834437086


  0%|          | 0/192 [00:00<?, ?it/s]

0.9947916666666666


  0%|          | 0/238 [00:00<?, ?it/s]

0.9831932773109243


  0%|          | 0/267 [00:00<?, ?it/s]

0.9812734082397003


  0%|          | 0/153 [00:00<?, ?it/s]

0.9934640522875817


  0%|          | 0/239 [00:00<?, ?it/s]

0.9874476987447699


  0%|          | 0/184 [00:00<?, ?it/s]

0.9945652173913043


  0%|          | 0/260 [00:00<?, ?it/s]

0.9923076923076923


  0%|          | 0/226 [00:00<?, ?it/s]

0.9823008849557522


In [23]:
second_chapter = [f'sm2_new_kap{i}.json' for i in range(1, 9)]

In [38]:
for file_name in tqdm(second_chapter):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/239 [00:00<?, ?it/s]

0.9790794979079498


  0%|          | 0/195 [00:00<?, ?it/s]

0.9846153846153847


  0%|          | 0/217 [00:00<?, ?it/s]

0.9815668202764977


  0%|          | 0/298 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [25]:
for file_name in tqdm(second_chapter[3:]):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/298 [00:00<?, ?it/s]

0.9899328859060402


  0%|          | 0/233 [00:00<?, ?it/s]

0.9785407725321889


  0%|          | 0/206 [00:00<?, ?it/s]

0.9805825242718447


  0%|          | 0/237 [00:00<?, ?it/s]

0.9789029535864979


  0%|          | 0/194 [00:00<?, ?it/s]

0.9948453608247423


In [27]:
lol = []

for file_name in tqdm(second_chapter):
    lol.append(check_file_accuracy(file_name))

print(sum(lol) / len(lol))

  0%|          | 0/8 [00:00<?, ?it/s]

0.9835082749901433


In [28]:
sm2_last_file = 'sm2_new_puhekieli.json'

In [29]:
generate_eng_finn_cards_from_file(sm2_last_file)
print(check_file_accuracy(sm2_last_file))

  0%|          | 0/110 [00:00<?, ?it/s]

0.9363636363636364


In [30]:
third_chapter = [f'sm3_kap{i}.json' for i in range(1, 9)]

In [31]:
for file_name in tqdm(third_chapter):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/157 [00:00<?, ?it/s]

0.9808917197452229


  0%|          | 0/208 [00:00<?, ?it/s]

0.9807692307692307


  0%|          | 0/150 [00:00<?, ?it/s]

0.98


  0%|          | 0/144 [00:00<?, ?it/s]

0.9791666666666666


  0%|          | 0/158 [00:00<?, ?it/s]

0.9936708860759493


  0%|          | 0/221 [00:00<?, ?it/s]

0.9864253393665159


  0%|          | 0/177 [00:00<?, ?it/s]

0.9887005649717514


  0%|          | 0/240 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [32]:
for file_name in tqdm(third_chapter[-1:]):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/240 [00:00<?, ?it/s]

0.9708333333333333


In [25]:
fourth_chapter = [f'sm4_kap{i}.json' for i in range(1, 6)]

In [26]:
for file_name in tqdm(fourth_chapter):
    generate_eng_finn_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/196 [00:00<?, ?it/s]

0.9744897959183674


  0%|          | 0/155 [00:00<?, ?it/s]

0.9741935483870968


  0%|          | 0/203 [00:00<?, ?it/s]

0.9458128078817734


  0%|          | 0/211 [00:00<?, ?it/s]

0.976303317535545


  0%|          | 0/278 [00:00<?, ?it/s]

0.9676258992805755


In [27]:
lol = []

for file_name in tqdm(fourth_chapter):
    lol.append(check_file_accuracy(file_name))

print(sum(lol) / len(lol))

  0%|          | 0/5 [00:00<?, ?it/s]

0.9676850738006717


In [120]:
bad_cards[0]

{'wordFirstLang': 'мы с удовольствием',
 'sentenceFirstLang': 'Мы были рады вас пригласить.',
 'wordSecondLang': 'mielellämme',
 'sentenceSecondLang': 'Kutsuimme teidät mielellämme.',
 'id': 2272}

In [146]:
moa_and_eval(bad_cards[4])

({'wordFirstLang': 'купаться',
  'sentenceFirstLang': 'Летом я люблю купаться в озере.',
  'wordSecondLang': 'to swim',
  'sentenceSecondLang': 'In the summer, I love to swim in the lake.',
  'id': 1556},
 {'englishSentenceCorrectness': {'isCorrect': True,
   'explanation': "The English sentence 'In the summer, I love to swim in the lake' is grammatically correct and natural. It uses proper syntax and conveys the intended meaning clearly.",
   'suggestedFix': None},
  'wordUsage': {'isCorrect': False,
   'explanation': "The word 'to swim' in English does not accurately convey the meaning of the Russian 'купаться' and Finnish 'kylpeä', which more closely align with 'to bathe' in this context. Both Russian and Finnish words imply the action of bathing rather than swimming.",
   'suggestedFixSentence': 'In the summer, I love to bathe in the lake.',
   'suggestedFixWord': 'to bathe'},
  'wordTranslationAccuracy': {'isCorrect': False,
   'explanation': "The English translation 'to swim' doe

In [156]:
def meaxture_of_improving_agents(ru_finn_card, ru_eng_card, eval_result, n_models=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    improved_cards = []
    for _ in range(n_models):
        improved_ru_eng_card = {}

        if is_phrase_card:
            improved_ru_eng_card = improve_ru_eng_phrase_card(ru_finn_card, ru_eng_card, eval_result)
        else:
            improved_ru_eng_card = improve_ru_eng_full_card(ru_finn_card, ru_eng_card, eval_result)

        improved_cards.append(improved_ru_eng_card)

    card = regressor(ru_finn_card, improved_cards, n_models)
    print(improved_cards)
    print('==========')
    print(card)

    return card

    #eval_result = {}

    #if is_phrase_card:
    #    eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    #else:
    #    eval_result = eval_full_card(ru_finn_card, ru_eng_card)

In [None]:
def card_handler_with_agents(ru_finn_card, num_iter=5, num_agents=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    ru_eng_card = {}

    if is_phrase_card:
        ru_eng_card = generate_ru_eng_phrase_card(ru_finn_card)
    else:
        ru_eng_card = generate_ru_eng_full_card(ru_finn_card)

    eval_result = {}

    if is_phrase_card:
        eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    else:
        eval_result = eval_full_card(ru_finn_card, ru_eng_card)

    for _ in range(num_iter):
        if check_eval(eval_result, is_phrase_card):
            break
        
        if is_phrase_card:
            ru_eng_card = meaxture_of_improving_agents(ru_finn_card, ru_eng_card, eval_result, num_agents)
        else:
            ru_eng_card = meaxture_of_improving_agents(ru_finn_card, ru_eng_card, eval_result, num_agents)

        if is_phrase_card:
            eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
        else:
            eval_result = eval_full_card(ru_finn_card, ru_eng_card)
    
    return ru_eng_card, eval_result

In [157]:
card_handler_with_agents(bad_cards[4])

[{'wordFirstLang': 'купаться', 'sentenceFirstLang': 'Летом я люблю купаться в озере.', 'wordSecondLang': 'to take a dip', 'sentenceSecondLang': 'In the summer, I love taking a dip in the lake.', 'id': 1556}, {'wordFirstLang': 'купаться', 'sentenceFirstLang': 'Летом я люблю купаться в озере.', 'wordSecondLang': 'to take a dip', 'sentenceSecondLang': 'In the summer, I love taking a dip in the lake.', 'id': 1556}, {'wordFirstLang': 'купаться', 'sentenceFirstLang': 'Летом я люблю купаться в озере.', 'wordSecondLang': 'to take a dip', 'sentenceSecondLang': 'In the summer, I love taking a dip in the lake.', 'id': 1556}]
{'wordFirstLang': 'купаться', 'sentenceFirstLang': 'Летом я люблю купаться в озере.', 'wordSecondLang': 'to swim', 'sentenceSecondLang': 'In the summer, I love swimming in the lake.', 'id': 1556}
[{'wordFirstLang': 'купаться', 'sentenceFirstLang': 'Летом я люблю купаться в озере.', 'wordSecondLang': 'to go swimming', 'sentenceSecondLang': 'In the summer, I love going swimming

({'wordFirstLang': 'купаться',
  'sentenceFirstLang': 'Летом я люблю купаться в озере.',
  'wordSecondLang': 'to swim',
  'sentenceSecondLang': 'In the summer, I love to swim in the lake.',
  'id': 1556},
 {'englishSentenceCorrectness': {'isCorrect': True,
   'explanation': "The English sentence 'In the summer, I love to swim in the lake.' is grammatically correct and natural.",
   'suggestedFix': None},
  'wordUsage': {'isCorrect': False,
   'explanation': "The English word 'to swim' doesn't precisely capture the meaning of the Russian 'купаться' or the Finnish 'kylpeä', which can imply bathing or swimming more leisurely. 'To swim' is more about active swimming.",
   'suggestedFixSentence': 'In the summer, I love to bathe in the lake.',
   'suggestedFixWord': 'to bathe'},
  'wordTranslationAccuracy': {'isCorrect': False,
   'explanation': "The translation of the word 'купаться' and 'kylpeä' to 'to swim' is not entirely accurate. A more accurate translation would be 'to bathe'.",
   's