In [1]:
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

In [2]:
source_lang = 'Russian'
secondary_lang = 'Finnish'
target_lang = 'Kazakh'

In [3]:
import json


RU_FI_EVAL_FOLDER = '../../data/russian-english/cards/eval_results/ru_eng_'
def get_ru_finn_eval_results_from_file(file_name):
    data = []
    file_path = RU_FI_EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

RU_KZ_EVAL_FOLDER = '../../data/russian-kazakh/cards/eval_results/ru_kz_'
def get_ru_kz_eval_results_from_file(file_name):
    data = []
    file_path = RU_KZ_EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_FI_FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def get_ru_finn_cards_from_file(file_name):
    data = []
    file_path = RU_FI_FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    res = []
    for card in data:
        if 'isMarkedDeleted' not in card.keys():
            res.append(card)
    return res


RU_KZ_OUTPUT_FOLDER = '../../data/russian-kazakh/cards/test_cards/ru_kz_'
def get_ru_kz_cards_from_file(file_name):
    data = []
    file_path = RU_KZ_OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


OUTPUT_FOLDER = '../../data/russian-kazakh/cards/test_cards/ru_kz_'
def write_cards_to_file(file_name, cards):
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(cards, f, ensure_ascii=False, indent=2)
        
        
EVAL_FOLDER = '../../data/russian-kazakh/cards/eval_results/ru_kz_'
def write_eval_results_to_file(file_name, results):
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

In [4]:
def generate_phrase_card(source_card, secondary_card, card_id, source_lang, target_lang, secondary_lang, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()

    system_prompt = f'''You are a multilingual assistant who is proficient in {source_lang}, {secondary_lang} and {target_lang}.'''

    user_prompt = f"""
    Translate the given {source_lang} word or phrase: '{source_word}' into clear and natural {target_lang}, reflecting its meaning and context as the primary focus. Use the {secondary_lang} equivalent: '{secondary_word}' as a supportive reference to help clarify or refine the exact context if needed. The goal is to create a {target_lang} translation that is accurate, fluent, and authentic to native speakers, avoiding overly complex or literal phrasing.

    Please provide the {target_lang} translation of the word or phrase and nothing else.

    Important formatting rules:
    1. Ensure punctuation consistency with the source language:
        - If the source word or phrase ends with a period, the translation should also end with a period unless there is another example in brackets
    2. Maintain consistent capitalization rules between source and target languages:
        - If the source word or phrase ends starts with lower case letter, the translation should also start with lower case letter
    3. Return the translation without any external quotation marks:
        - Do not wrap the result in quotes
    4. Return only the translation without any additions
    """

    response = client.chat.completions.create(
        model=model,
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )

    tr_word = response.choices[0].message.content.strip()

    card = {}
    card["word"] = tr_word
    card["sentence"] = ""
    card["id"] = card_id

    return card

In [5]:
def generate_full_card(source_card, secondary_card, card_id, source_lang, target_lang, secondary_lang, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()

    system_prompt = f'''You are a multilingual assistant who is proficient in {source_lang}, {secondary_lang} and {target_lang}.'''

    user_prompt = f"""
    **Translate the given {source_lang} word or phrase along with its {secondary_lang} translation into {target_lang}, and then translate the provided {source_lang} sentence, incorporating the {target_lang} translation of the word or phrase. Use synonyms or related terms where necessary to convey the intended meaning and maintain naturalness in {target_lang}.**  

    Given word or phrase ({source_lang}): '{source_word}'  
    Given word or phrase ({secondary_lang}): '{secondary_word}'  

    Given sentence ({source_lang}): '{source_sentence}'  
    Given sentence ({secondary_lang}): '{secondary_sentence}'  

    ### Response structure:  

    Respond in JSON format with the following structure:
    {{
        "translatedWord": "Translated word in {target_lang}",
        "translatedSentence": "Translated sentence in {target_lang}"
    }}
    ```

    Important formatting rules:
    1. Ensure punctuation consistency with the source language:
        - If the source word or phrase ends with a period, the translation should also end with a period unless there is another example in brackets
    2. Maintain consistent capitalization rules between source and target languages:
        - If the source word or phrase ends starts with lower case letter, the translation should also start with lower case letter
    3. Return the translation without any external quotation marks:
        - Do not wrap the result in quotes
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['translatedWord']
    tr_sentence = response_dict['translatedSentence']

    card = {}
    card["word"] = tr_word.strip()
    card["sentence"] = tr_sentence.strip()
    card["id"] = card_id

    return card

In [6]:
def eval_phrase_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()
    target_word, target_sentence, _ = target_card.items()

    system_prompt = f'''You are a multilingual assistant who is proficient in {source_lang}, {secondary_lang} and {target_lang}.'''

    user_prompt = f"""
    **As an AI model, your task is to evaluate the correctness and naturalness of {target_lang} translations for given {source_lang} and {secondary_lang} words or phrases. Check if the {target_lang} translation accurately conveys the meaning and context of the {source_lang} and {secondary_lang} versions, and whether it sounds natural to a native speaker. Your evaluation does not need to suggest the best possible translation, only confirm that it is good enough and identify any issues if present.**  

    When suggesting corrections, provide only the final corrected {target_lang} translation. If no correction is needed, set `suggestedFix` to `null`.  

    Here are the words or phrases:  
    - Word or Phrase in {source_lang}: {source_word}  
    - Word or Phrase in {secondary_lang}: {secondary_word}  
    - Word or Phrase in {target_lang}: {target_word}  

    Respond in JSON format using the following structure:
    {{
      "translationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct",
        "suggestedFix": "Suggested correction if there is an issue or null if no correction is needed"
      }}
    }}
    """

    response = client.chat.completions.create(
      model=model,
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
      ]
    )

    res = json.loads(response.choices[0].message.content.strip())
    res['id'] = card_id

    return res

In [7]:
def eval_full_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()
    target_word, target_sentence, _ = target_card.items()

    system_prompt = f'''You are a multilingual assistant who is proficient in {source_lang}, {secondary_lang} and {target_lang}.'''

    user_prompt = f"""
    **Evaluate the correctness of a {target_lang} word and sentence based on their translations from {source_lang} and {secondary_lang}. You will receive a word in {source_lang}, {secondary_lang}, and its translation in {target_lang}, as well as a sentence in {source_lang}, {secondary_lang}, and its translation in {target_lang}. Your task is to assess the quality of the {target_lang} sentence, the usage of the {target_lang} word in the sentence, and the accuracy of the translations from {source_lang} and {secondary_lang} to {target_lang}. For each evaluation point, provide a detailed explanation of your judgment and suggest fixes where applicable, either to the {target_lang} word, the {target_lang} sentence, or both.**  

    Please ensure that the {target_lang} sentence is grammatically correct and natural. Suggest a corrected version if necessary. Verify that the {target_lang} sentence contains the {target_lang} word in some form and suggest using synonyms or related terms if the word is missing. Prioritize naturalness and correctness. Ensure that the translations of both the word and sentence from {source_lang} and {secondary_lang} to {target_lang} are accurate and provide corrections if necessary.  

    Respond in JSON format with the following structure:  
    {{
        "sentenceCorrectness": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct.",
            "suggestedFix": "Suggested corrected sentence if there is an issue, or null if not applicable."
        }},
        "wordUsage": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct.",
            "suggestedFixSentence": "Suggested corrected sentence if the word usage is incorrect, or null if not applicable.",
            "suggestedFixWord": "Suggested corrected word if the word usage is incorrect, or null if not applicable."
        }},
        "wordTranslationAccuracy": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct.",
            "suggestedFix": "Suggested correction for translation issues, or null if not applicable."
        }},
        "sentenceTranslationAccuracy": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct.",
            "suggestedFix": "Suggested correction for translation issues, or null if not applicable."
        }}
    }}

    Here are the provided word and sentence in {source_lang}, {secondary_lang}, and {target_lang}:  

    - Word in {source_lang}: {source_word}  
    - Word in {secondary_lang}: {secondary_word}  
    - Word in {target_lang}: {target_word}  
    - Sentence in {source_lang}: {source_sentence}  
    - Sentence in {secondary_lang}: {secondary_sentence}  
    - Sentence in {target_lang}: {target_sentence}  

    Please adhere to this structure to ensure clear, actionable feedback for each evaluation point.
    """

    response = client.chat.completions.create(
      model=model,
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
      ]
    )

    res = json.loads(response.choices[0].message.content.strip())
    res['id'] = card_id

    return res

In [8]:
BASE_SYSTEM_PROMPT = f'''You are a multilingual assistant who is proficient in {source_lang}, {secondary_lang} and {target_lang}.'''

In [9]:
def improve_phrase_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result, system_prompt=BASE_SYSTEM_PROMPT, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()
    target_word, target_sentence, _ = target_card.items()

    _, explanation, suggested_fix = eval_result['translationAccuracy'].values()

    user_prompt = f"""
    **Translate the given {source_lang} word or phrase: '{source_word}' into clear and natural {target_lang}, prioritizing its meaning and how it would sound most authentic and fluent to native speakers. Use the {secondary_lang} equivalent: '{secondary_word}' as a supportive reference if it helps refine the context or meaning. While accuracy is important, favor translations that fit naturally into everyday {target_lang}, even if they are not the most direct equivalents.**  

    Consider the following:  
    - Existing {target_lang} translation: '{target_word}'  
    - Issues identified: '{explanation}'  
    - Suggested improvement: '{suggested_fix}'  

    ### Instructions:  
    Based on the information provided, craft a {target_lang} translation that balances accuracy, naturalness, and context. You may adapt the word or phrase slightly to ensure it resonates well with native speakers and fits its intended use.  

    **Respond with only the final {target_lang} translation without any additional explanations, just word or phrase in {target_lang}.**

    Important formatting rules:
    1. Ensure punctuation consistency with the source language:
        - If the source word or phrase ends with a period, the translation should also end with a period unless there is another example in brackets
    2. Maintain consistent capitalization rules between source and target languages:
        - If the source word or phrase ends starts with lower case letter, the translation should also start with lower case letter
    3. Return the translation without any external quotation marks:
        - Do not wrap the result in quotes
    4. Return only the translation without any additions
    """

    response = client.chat.completions.create(
        model=model,
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    tr_word = response.choices[0].message.content.strip()

    card = {}
    card["word"] = tr_word
    card["sentence"] = ""
    card["id"] = card_id

    return card

In [10]:
def improve_full_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result, system_prompt=BASE_SYSTEM_PROMPT, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()
    target_word, target_sentence, _ = target_card.items()

    user_prompt = f"""
    **Translate the given {source_lang} word or phrase into {target_lang} and use it within the provided {source_lang} sentence to create a natural and accurate {target_lang} translation. Use the {secondary_lang} word or sentence as additional context if needed. Focus on accurately conveying the meaning of the {source_lang} source while incorporating all feedback and suggestions from the evaluation results.**  

    When choosing the {target_lang} word or phrase, prioritize naturalness and fluency in the sentence over strict accuracy. While the translation should reflect the original meaning, it is acceptable to use a word or phrase that is not the most direct translation but still conveys the intended sense in a way that sounds natural and idiomatic in {target_lang}.  

    ### **Details to guide your translation:**  

    - **Word or phrase in {source_lang}:** '{source_word}'  
    - **Word or phrase in {secondary_lang}:** '{secondary_word}'  
    - **Sentence in {source_lang}:** '{source_sentence}'  
    - **Sentence in {secondary_lang}:** '{secondary_sentence}'  
    - **Existing {target_lang} translation of the word or phrase:** '{target_word}'  
    - **Existing {target_lang} translation of the sentence:** '{target_sentence}'  

    ### **Evaluation Results Summary:**  

    **{target_lang} Sentence Evaluation:**  
    - **Correctness:** {eval_result['sentenceCorrectness']['isCorrect']}  
    - **Explanation:** {eval_result['sentenceCorrectness']['explanation']}  
    - **Suggested Fix (if applicable):** {eval_result['sentenceCorrectness']['suggestedFix']}  

    **Word Usage Evaluation:**  
    - **Correctness:** {eval_result['wordUsage']['isCorrect']}  
    - **Explanation:** {eval_result['wordUsage']['explanation']}  
    - **Suggested Fix for Word (if applicable):** {eval_result['wordUsage']['suggestedFixWord']}  
    - **Suggested Fix for Sentence (if applicable):** {eval_result['wordUsage']['suggestedFixSentence']}  

    **Word Translation Accuracy Evaluation:**  
    - **Correctness:** {eval_result['wordTranslationAccuracy']['isCorrect']}  
    - **Explanation:** {eval_result['wordTranslationAccuracy']['explanation']}  
    - **Suggested Fix (if applicable):** {eval_result['wordTranslationAccuracy']['suggestedFix']}  

    **Sentence Translation Accuracy Evaluation:**  
    - **Correctness:** {eval_result['sentenceTranslationAccuracy']['isCorrect']}  
    - **Explanation:** {eval_result['sentenceTranslationAccuracy']['explanation']}  
    - **Suggested Fix (if applicable):** {eval_result['sentenceTranslationAccuracy']['suggestedFix']}  

    ### **Instructions:**  

    1. **Review the Evaluation Feedback:**  
    - Carefully consider all provided explanations and suggested fixes for the word or phrase, sentence, and overall translation accuracy.  

    2. **Translate the Word or Phrase:**  
    - Choose a translation that balances accuracy with naturalness.  
    - If the word usage is marked incorrect, incorporate the suggested fix or refine it further for better contextual alignment.  

    3. **Translate the {source_lang} Sentence:**  
    - Integrate the translated word or phrase naturally into the sentence.  
    - If the sentence translation is marked incorrect, incorporate the suggested fixes and adjust for fluency and clarity.  

    4. **Address Translation Accuracy Issues:**  
    - If any translation inaccuracies are identified, apply the suggested fixes or clarify the meaning while ensuring the translation sounds natural and idiomatic.  

    5. **Leverage Context:**  
    - Use the {secondary_lang} word or sentence as additional guidance where necessary.  

    6. **Produce a Polished Result:**  
    - Ensure the final translation conveys the intended meaning, aligns naturally with the sentence, and incorporates feedback from the evaluation results.  

    ### **Response structure:**  

    Respond in JSON format with the following structure:  
    {{
        "translatedWord": "Translated word in {target_lang}",
        "translatedSentence": "Translated sentence in {target_lang}"
    }}

    Important formatting rules:
    1. Ensure punctuation consistency with the source language:
        - If the source word or phrase ends with a period, the translation should also end with a period unless there is another example in brackets
    2. Maintain consistent capitalization rules between source and target languages:
        - If the source word or phrase ends starts with lower case letter, the translation should also start with lower case letter
    3. Return the translation without any external quotation marks (do not wrap the result in quotes)
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['translatedWord']
    tr_sentence = response_dict['translatedSentence']

    card = {}
    card["word"] = tr_word.strip()
    card["sentence"] = tr_sentence.strip()
    card["id"] = card_id

    return card

In [11]:
def is_phrase_card(card):
    return card['sentence'] == ''

In [12]:
def check_eval(eval_result, is_phrase_card_):
    if is_phrase_card_:
        return eval_result['translationAccuracy']['isCorrect']
    
    w_tr_accuracy = eval_result['wordTranslationAccuracy']['isCorrect']
    s_tr_accuracy = eval_result['sentenceTranslationAccuracy']['isCorrect']
    s_accuracy = eval_result['sentenceCorrectness']['isCorrect']
    w_accuracy = eval_result['wordUsage']['isCorrect']
    
    return all([w_tr_accuracy, s_tr_accuracy, s_accuracy, w_accuracy])

In [13]:
def regressor(source_card, secondary_card, target_cards, card_id, source_lang, target_lang, secondary_lang, n_models=3, model=GPT_4o):

    source_word, source_sentence, _ = source_card.items()
    secondary_word, secondary_sentence, _ = secondary_card.items()

    target_words = [card['word'] for card in target_cards]
    target_sentences = [card['sentence'] for card in target_cards]

    models_response = ''
    for i in range(n_models):
        models_response += f"""{i + 1}. `Model {i + 1}` Response: {{ "{target_lang}Word": '{target_words[i]}', "{target_lang}Sentence": '{target_sentences[i]}'}}\n"""

    system_prompt = f'''Act as a translation evaluator and synthesizer. Assess model-generated translations for a {source_lang} word/phrase and its sentence, prioritizing accuracy, fluency, and contextual fit. Return the best translations in JSON format with refinements if necessary.'''

    user_prompt = f"""
**Instruction for the Regressor**:  
Evaluate the translations provided by different models for a given {source_lang} word or phrase and its accompanying sentence. Select or synthesize the best {target_lang} translation for the word and sentence based on:
1. **Accuracy**: Ensure the translation reflects the original {source_lang} meaning correctly.
2. **Naturalness**: The {target_lang} translations should be fluid and use appropriate synonyms or phrasing where needed.
3. **Consistency**: The translated sentence should appropriately incorporate the word's translation.

### Input:
Given word or phrase ({source_lang}): '{source_word}'  
Given word or phrase ({secondary_lang}): '{secondary_word}'  

Given sentence ({source_lang}): '{source_sentence}'  
Given sentence ({secondary_lang}): '{secondary_sentence}'  

Responses from models:
{models_response}

---

### Task:
Critically evaluate these responses, identify the best translations, and synthesize a single, high-quality translation. If needed, refine the translations to ensure accuracy and naturalness. Do not simply copy; improve where necessary.

### Response structure:
Respond in JSON format with the following structure:
{{
    "{target_lang}Word": "Best translated word in {target_lang}",
    "{target_lang}Sentence": "Best translated sentence in {target_lang}"
}}

---

### Key Considerations for Evaluation:
- **Accuracy**: Does the translation reflect the original meaning and nuances in {source_lang}?
- **Fluency**: Is the translation grammatically correct and natural in {target_lang}?
- **Contextual Fit**: Does the sentence correctly integrate the translation of the word or phrase?

Important formatting rules:
1. Ensure punctuation consistency with the source language:
    - If the source word or phrase ends with a period, the translation should also end with a period unless there is another example in brackets
2. Maintain consistent capitalization rules between source and target languages:
    - If the source word or phrase ends starts with lower case letter, the translation should also start with lower case letter
3. Return the translation without any external quotation marks (do not wrap the result in quotes)
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    target_word = response_dict[f'{target_lang}Word']
    target_sentence = response_dict[f'{target_lang}Sentence']

    card = {}
    card["word"] = target_word.strip()
    card["sentence"] = target_sentence.strip()
    card["id"] = card_id

    return card


In [14]:
system_prompts = [
    "Act as a linguistic expert specializing in Russian, Finnish, and Kazakh translations. Evaluate the given Kazakh word and sentence translations in conjunction with the Russian context. Based on evaluation feedback, refine the word and sentence translations for accuracy, fluency, and naturalness, ensuring idiomatic Finnish usage. Provide the response in JSON format.",
    "Take on the role of a translator focusing on contextual accuracy. Use the provided Kazakh and Russian inputs, along with feedback evaluations, to create an Finnish translation that balances precise meaning with natural phrasing. Address inaccuracies and ensure the translation reads fluent?ly and idiomatically. Respond in JSON format.",
    "Serve as a cultural localization specialist. Translate the provided Kazakh word and sentence into Finnish, using the Russian context and feedback to enhance naturalness and idiomatic expression. Focus on making the translation contextually relevant and fluent for an Finnish-speaking audience. Provide your output in JSON format."
]

In [15]:
def meaxture_of_improving_agents(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result, n_models=3):
    is_phrase_card_ = is_phrase_card(source_card)

    target_cards = []
    for s_p in system_prompts:
        improved_eng_finn_card = {}

        if is_phrase_card_:
            improved_eng_finn_card = improve_phrase_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result, system_prompt=s_p)
        else:
            improved_eng_finn_card = improve_full_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result, system_prompt=s_p)

        target_cards.append(improved_eng_finn_card)

    card = regressor(source_card, secondary_card, target_cards, card_id, source_lang, target_lang, secondary_lang, n_models)

    return card

In [16]:
def card_handler_with_agents(source_card, secondary_card, card_id, source_lang, target_lang, secondary_lang, num_iter=5, num_agents=3):
    is_phrase_card_ = is_phrase_card(source_card)

    target_card = {}

    if is_phrase_card_:
        target_card = generate_phrase_card(source_card, secondary_card, card_id, source_lang, target_lang, secondary_lang)
    else:
        target_card = generate_full_card(source_card, secondary_card, card_id, source_lang, target_lang, secondary_lang)

    eval_result = {}

    if is_phrase_card_:
        eval_result = eval_phrase_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang)
    else:
        eval_result = eval_full_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang)

    for _ in range(num_iter):
        if check_eval(eval_result, is_phrase_card_):
            break
        
        if is_phrase_card_:
            target_card = meaxture_of_improving_agents(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result)
        else:
            target_card = meaxture_of_improving_agents(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang, eval_result)

        if is_phrase_card_:
            eval_result = eval_phrase_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang)
        else:
            eval_result = eval_full_card(source_card, secondary_card, target_card, card_id, source_lang, target_lang, secondary_lang)
    
    return target_card, eval_result

In [17]:
import logging

logging.basicConfig(
    filename='errors.txt',
    level=logging.ERROR,
    format='%(message)s',
    filemode='a'
)

def generate_source_target_cards_with_eval_results(source_cards, secondary_cards, source_lang, target_lang, secondary_lang):

    source_target_cards = []
    eval_results = []

    for source_card, secondary_card in tqdm(list(zip(source_cards, secondary_cards))):
        card_id = source_card['id']

        while True:
            try:
                target_card, eval_res = card_handler_with_agents(source_card, secondary_card, card_id, source_lang, target_lang, secondary_lang)

                card = {}
                card["wordFirstLang"] = source_card['word']
                card["sentenceFirstLang"] = source_card['sentence']
                card["wordSecondLang"] = target_card['word']
                card["sentenceSecondLang"] = target_card['sentence']
                card["id"] = card_id

                source_target_cards.append(card)
                eval_results.append(eval_res)
                break
            except Exception as e:
                logging.error(str(e))
                pass

    assert len(source_target_cards) == len(eval_results)

    return source_target_cards, eval_results

In [18]:
def generate_and_write_cards(file_name):
    ru_finn_cards = get_ru_finn_cards_from_file(file_name)

    source_cards = [{'word': card["wordFirstLang"], 'sentence': card["sentenceFirstLang"], 'id': card['id']} for card in ru_finn_cards]
    secondary_cards = [{'word': card["wordSecondLang"], 'sentence': card["sentenceSecondLang"], 'id': card['id']} for card in ru_finn_cards]

    source_lang, target_lang, secondary_lang = 'Russian', 'Kazakh', 'Finnish'

    source_target_cards, eval_results = generate_source_target_cards_with_eval_results(source_cards, secondary_cards, source_lang, target_lang, secondary_lang)

    res = []
    for card, eval_res in list(zip(source_target_cards, eval_results)):
        res.append(check_eval(eval_res, card['sentenceFirstLang'] == ''))
    
    print(sum(res) / len(res))

    write_cards_to_file(file_name, source_target_cards)
    write_eval_results_to_file(file_name, eval_results)

In [23]:
first_book = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

for file_name in tqdm(first_book):
    generate_and_write_cards(file_name)

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

0.9602649006622517


  0%|          | 0/192 [00:00<?, ?it/s]

0.953125


  0%|          | 0/238 [00:00<?, ?it/s]

0.9411764705882353


  0%|          | 0/267 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
first_book = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

for file_name in tqdm(first_book[3:]):
    generate_and_write_cards(file_name)

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/267 [00:00<?, ?it/s]

0.9363295880149812


  0%|          | 0/153 [00:00<?, ?it/s]

0.954248366013072


  0%|          | 0/239 [00:00<?, ?it/s]

In [19]:
first_book = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

for file_name in tqdm(first_book[5:]):
    generate_and_write_cards(file_name)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/239 [00:00<?, ?it/s]

0.9581589958158996


  0%|          | 0/184 [00:00<?, ?it/s]

0.9619565217391305


  0%|          | 0/260 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [20]:
first_book = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

for file_name in tqdm(first_book[7:]):
    generate_and_write_cards(file_name)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/260 [00:00<?, ?it/s]

0.926923076923077


  0%|          | 0/226 [00:00<?, ?it/s]

0.9292035398230089


In [23]:
import numpy as np

def get_cards_and_evals_from_files(file_names):
    ru_kz_cards = []
    ru_kz_evals = []

    for file_name in file_names:
        ru_kz_cards.extend(get_ru_kz_cards_from_file(file_name))
        ru_kz_evals.extend(get_ru_kz_eval_results_from_file(file_name))

    return ru_kz_cards, ru_kz_evals

In [24]:
first_book_cards, first_book_evals = get_cards_and_evals_from_files(first_book)

In [25]:
def check_cards_accuracy(source_target_cards, eval_results):
    res = []
    for card, eval_res in list(zip(source_target_cards, eval_results)):
        res.append(check_eval(eval_res, card['sentenceFirstLang'] == ''))
    
    print(sum(res) / len(res))

In [26]:
check_cards_accuracy(first_book_cards, first_book_evals)

0.9450261780104712


In [28]:
list(zip(first_book_cards, first_book_evals))[:10]

[({'wordFirstLang': 'Привет!',
   'sentenceFirstLang': 'Привет, как дела?',
   'wordSecondLang': 'Сәлем!',
   'sentenceSecondLang': 'Сәлем, қалайсың?',
   'id': 1000},
  {'sentenceCorrectness': {'isCorrect': True,
    'explanation': "The Kazakh sentence 'Сәлем, қалайсың?' is grammatically correct and natural. It properly translates the greeting and inquiry, aligning well with both the Russian and Finnish sentences.",
    'suggestedFix': None},
   'wordUsage': {'isCorrect': True,
    'explanation': "The Kazakh word 'Сәлем!' is appropriately used in the sentence 'Сәлем, қалайсың?'. It corresponds well with the greeting 'Привет!' and 'Hei!' in Russian and Finnish respectively.",
    'suggestedFixSentence': None,
    'suggestedFixWord': None},
   'wordTranslationAccuracy': {'isCorrect': True,
    'explanation': "The translation of the word from Russian and Finnish to Kazakh is accurate. 'Привет!' and 'Hei!' both mean 'Hello!', which is correctly translated into Kazakh as 'Сәлем!'.",
    's

In [35]:
import random

first_book_sample = random.choices(list(zip(first_book_cards, first_book_evals)), k=30)

In [36]:
first_book_sample

[({'wordFirstLang': 'сто один',
   'sentenceFirstLang': '',
   'wordSecondLang': 'жүз бір',
   'sentenceSecondLang': '',
   'id': 1095},
  {'translationAccuracy': {'isCorrect': True,
    'explanation': "The Kazakh translation accurately conveys the meaning of the Russian 'сто один' and the Finnish 'satayksi', both of which mean '101'. 'Жүз бір' is the correct and natural way to express the number 101 in Kazakh.",
    'suggestedFix': None},
   'id': 1095}),
 ({'wordFirstLang': 'у нас',
   'sentenceFirstLang': 'У нас закончился кофе.',
   'wordSecondLang': 'бізде',
   'sentenceSecondLang': 'Бізде кофе таусылды.',
   'id': 2933},
  {'sentenceCorrectness': {'isCorrect': True,
    'explanation': "The Kazakh sentence, 'Бізде кофе таусылды.', is grammatically correct and natural. It correctly uses the structure to indicate that they have run out of coffee.",
    'suggestedFix': None},
   'wordUsage': {'isCorrect': True,
    'explanation': "The word 'бізде' is correctly used in the sentence to

In [37]:
second_book = [f'sm2_new_kap{i}.json' for i in range(1, 9)]

for file_name in tqdm(second_book):
    generate_and_write_cards(file_name)

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/239 [00:00<?, ?it/s]

0.9372384937238494


  0%|          | 0/195 [00:00<?, ?it/s]

0.9487179487179487


  0%|          | 0/217 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [19]:
second_book = [f'sm2_new_kap{i}.json' for i in range(1, 9)]

for file_name in tqdm(second_book[2:]):
    generate_and_write_cards(file_name)

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/217 [00:00<?, ?it/s]

0.9354838709677419


  0%|          | 0/298 [00:00<?, ?it/s]

0.9697986577181208


  0%|          | 0/233 [00:00<?, ?it/s]

0.9484978540772532


  0%|          | 0/206 [00:00<?, ?it/s]

0.9368932038834952


  0%|          | 0/237 [00:00<?, ?it/s]

0.9324894514767933


  0%|          | 0/194 [00:00<?, ?it/s]

0.9587628865979382


In [20]:
third_book = [[f'sm3_kap{i}.json' for i in range(1, 9)]]

for file_name in tqdm(third_book):
    generate_and_write_cards(file_name)

  0%|          | 0/1 [00:00<?, ?it/s]

TypeError: can only concatenate str (not "list") to str

In [None]:
file_names = [f'sm1_new_kap{i}.json' for i in range(1, 10)]
file_names.extend([f'sm2_new_kap{i}.json' for i in range(1, 9)])
file_names.append('sm2_new_puhekieli.json')
file_names.extend([f'sm3_kap{i}.json' for i in range(1, 9)])
file_names.extend([f'sm4_kap{i}.json' for i in range(1, 6)])