In [1]:
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

In [2]:
import json


EVAL_FOLDER = '../../data/russian-english/cards/eval_results/ru_eng_'
def get_eval_results_from_file(file_name):
    data = []
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_ENG_OUTPUT_FOLDER = '../../data/russian-english/cards/test_cards/ru_eng_'
def get_ru_eng_cards_from_file(file_name):
    data = []
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_FINN_FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def get_ru_finn_cards_from_file(file_name):
    data = []
    file_path = RU_FINN_FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    res = []
    for card in data:
        if 'isMarkedDeleted' not in card.keys():
            res.append(card)
    return res


OUTPUT_FOLDER = '../../data/russian-english/cards/test_cards/ru_eng_'
def write_cards_to_file(file_name, cards):
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(cards, f, ensure_ascii=False, indent=2)
        
        
EVAL_FOLDER = '../../data/russian-english/cards/eval_results/ru_eng_'
def write_eval_results_to_file(file_name, results):
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

In [3]:
def generate_ru_eng_phrase_card(ru_finn_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()

    system_prompt = '''You are a multilingual assistant who is proficient in Russian, Finnish and English.'''

    user_prompt = f"""
    Translate the given Russian word or phrase: '{ru_word}' into clear and natural English, reflecting its meaning and context as the primary focus. Use the Finnish equivalent: '{finn_word}' as a supportive reference to help clarify or refine the exact context if needed. The goal is to create an English translation that is accurate, fluent, and authentic to native speakers, avoiding overly complex or literal phrasing.

    Please provide the English translation of the word or phrase and nothing else.
    """

    response = client.chat.completions.create(
        model=model,
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )

    tr_word = response.choices[0].message.content.strip()

    card = {}
    card["wordFirstLang"] = ru_word
    card["sentenceFirstLang"] = ""
    card["wordSecondLang"] = tr_word
    card["sentenceSecondLang"] = ""
    card["id"] = card_id

    return card

In [4]:
def generate_ru_eng_full_card(ru_finn_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()

    system_prompt = '''You are a multilingual assistant who is proficient in Russian, Finnish and English.'''

    user_prompt = f"""
    Translate the given Russian word or phrase along with its Finnish translation into English, and then translate the provided Russian sentence, incorporating the English translation of the word or phrase. Use synonyms or related terms where necessary to convey the intended meaning and maintain naturalness in English.

    Given word or phrase (Russian): '{ru_word}'
    Given word or phrase (Finnish): '{finn_word}'

    Given sentence (Russian): '{ru_sentence}'
    Given sentence (Finnish): '{finn_sentence}'

    ### Response structure:

    Respond in JSON format with the following structure:
    {{
        "englishWord": "Translated word in Englsih",
        "englishSentence": "Translated sentence in English"
    }}
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['englishWord']
    tr_sentence = response_dict['englishSentence']

    card = {}
    card["wordFirstLang"] = ru_word
    card["sentenceFirstLang"] = ru_sentence
    card["wordSecondLang"] = tr_word.strip()
    card["sentenceSecondLang"] = tr_sentence.strip()
    card["id"] = card_id

    return card

In [5]:
def eval_phrase_card(ru_finn_card, ru_eng_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()
    _, _, tr_word, tr_sentence, tr_id = ru_eng_card.values()

    assert(card_id == tr_id)

    system_prompt = '''You are a multilingual assistant who is proficient in Russian, Finnish and English.'''

    user_prompt = f"""
    As an AI model, your task is to evaluate the correctness and naturalness of English translations for given Russian and Finnish words or phrases. Check if the English translation accurately conveys the meaning and context of the Russian and Finnish versions, and whether it sounds natural to a native speaker. Your evaluation does not need to suggest the best possible translation, only confirm that it is good enough and identify any issues if present.

    When suggesting corrections, provide only the final corrected English translation. If no correction is needed, set `suggestedFix` to `null`.

    Here are the words or phrases:  
    - Word or Phrase in Russian: {ru_word}  
    - Word or Phrase in Finnish: {finn_word}  
    - Word or Phrase in English: {tr_word}  

    Respond in JSON format using the following structure:  
    {{
      "translationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct",
        "suggestedFix": "Suggested correction if there is an issue or null if no correction is needed"
      }}
    }}
    """

    response = client.chat.completions.create(
      model=model,
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
      ]
    )

    res = json.loads(response.choices[0].message.content.strip())
    res['id'] = card_id

    return res

In [6]:
def eval_full_card(ru_finn_card, ru_eng_card, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()
    _, _, tr_word, tr_sentence, tr_id = ru_eng_card.values()

    assert(card_id == tr_id)

    system_prompt = '''You are a multilingual assistant who is proficient in Russian, Finnish and English.'''

    user_prompt = f"""
    Evaluate the correctness of an English word and sentence based on their translations from Russian and Finnish. You will receive a word in Russian, Finnish, and its translation in English, as well as a sentence in Russian, Finnish, and its translation in English. Your task is to assess the quality of the English sentence, the usage of the English word in the sentence, and the accuracy of the translations from Russian and Finnish to English. For each evaluation point, provide a detailed explanation of your judgment and suggest fixes where applicable, either to the English word, the English sentence, or both.

    Please ensure that the English sentence is grammatically correct and natural. Suggest a corrected version if necessary. Verify that the English sentence contains the English word in some form and suggest using synonyms or related terms if the word is missing. Prioritize naturalness and correctness. Ensure that the translations of both the word and sentence from Russian and Finnish to English are accurate and provide corrections if necessary.

    Respond in JSON format with the following structure:
    {{
        "englishSentenceCorrectness": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFix": "Suggested corrected sentence if there is an issue, or null if not applicable."
        }},
        "wordUsage": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFixSentence": "Suggested corrected sentence if the word usage is incorrect, or null if not applicable.",
        "suggestedFixWord": "Suggested corrected word if the word usage is incorrect, or null if not applicable."
        }},
        "wordTranslationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFix": "Suggested correction for translation issues, or null if not applicable."
        }},
        "sentenceTranslationAccuracy": {{
        "isCorrect": true/false,
        "explanation": "Detailed explanation if there is an issue or why it's correct.",
        "suggestedFix": "Suggested correction for translation issues, or null if not applicable."
        }}
    }}

    Here are the provided word and sentence in Russian, Finnish, and English:

    - Word in Russian: {ru_word}
    - Word in Finnish: {finn_word}
    - Word in English: {tr_word}
    - Sentence in Russian: {ru_sentence}
    - Sentence in Finnish: {finn_sentence}
    - Sentence in English: {tr_sentence}

    Please adhere to this structure to ensure clear, actionable feedback for each evaluation point.
    """

    response = client.chat.completions.create(
      model=model,
      response_format={ "type": "json_object" },
      messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
      ]
    )

    res = json.loads(response.choices[0].message.content.strip())
    res['id'] = card_id

    return res

In [7]:
BASE_SYSTEM_PROMPT = '''You are a multilingual assistant who is proficient in Russian, Finnish and English.'''

In [8]:
def improve_ru_eng_phrase_card(ru_finn_card, ru_eng_card, eval_result, system_prompt=BASE_SYSTEM_PROMPT, model=GPT_4o):
    ru_word, _, finn_word, _, card_id = ru_finn_card.values()
    _, _, eng_word, _, tr_id = ru_eng_card.values()
    _, explanation, suggested_fix = eval_result['translationAccuracy'].values()

    assert(card_id == tr_id)

    user_prompt = f"""
    Translate the given Russian word or phrase: '{ru_word}' into clear and natural English, prioritizing its meaning and how it would sound most authentic and fluent to native speakers. Use the Finnish equivalent: '{finn_word}' as a supportive reference if it helps refine the context or meaning. While accuracy is important, favor translations that fit naturally into everyday English, even if they are not the most direct equivalents.

    Consider the following:
    Existing English translation: '{eng_word}'
    Issues identified: '{explanation}'
    Suggested improvement: '{suggested_fix}'
    Instructions:
    Based on the information provided, craft an English translation that balances accuracy, naturalness, and context. You may adapt the word or phrase slightly to ensure it resonates well with native speakers and fits its intended use.

    Respond with only the final English translation without any additional explanations.
    """

    response = client.chat.completions.create(
        model=model,
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    tr_word = response.choices[0].message.content.strip()

    card = {}
    card["wordFirstLang"] = ru_word
    card["sentenceFirstLang"] = ""
    card["wordSecondLang"] = tr_word
    card["sentenceSecondLang"] = ""
    card["id"] = card_id

    return card

In [9]:
def improve_ru_eng_full_card(ru_finn_card, ru_eng_card, eval_result, system_prompt=BASE_SYSTEM_PROMPT, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()
    _, _, eng_word, eng_sentence, tr_id = ru_eng_card.values()

    assert(card_id == tr_id)

    user_prompt = f"""
    Translate the given Russian word or phrase into English and use it within the provided Russian sentence to create a natural and accurate English translation. Use the Finnish word or sentence as additional context if needed. Focus on accurately conveying the meaning of the Russian source while incorporating all feedback and suggestions from the evaluation results.

    When choosing the English word or phrase, prioritize naturalness and fluency in the sentence over strict accuracy. While the translation should reflect the original meaning, it is acceptable to use a word or phrase that is not the most direct translation but still conveys the intended sense in a way that sounds natural and idiomatic in English.

    ### Details to guide your translation:

    Russian word or phrase: '{ru_word}'
    Finnish word or phrase: '{finn_word}'
    Russian sentence: '{ru_sentence}'
    Finnish sentence: '{finn_sentence}'
    Existing English translation of the word or phrase: '{eng_word}'
    Existing English translation of the sentence: '{eng_sentence}'

    ### Evaluation Results Summary:

    **English Sentence Evaluation**:

    Correctness: {eval_result['englishSentenceCorrectness']['isCorrect']}
    Explanation: {eval_result['englishSentenceCorrectness']['explanation']}
    Suggested Fix (if applicable): {eval_result['englishSentenceCorrectness']['suggestedFix']}

    **Word Usage Evaluation**:

    Correctness: {eval_result['wordUsage']['isCorrect']}
    Explanation: {eval_result['wordUsage']['explanation']}
    Suggested Fix for Word (if applicable): {eval_result['wordUsage']['suggestedFixWord']}
    Suggested Fix for Sentence (if applicable): {eval_result['wordUsage']['suggestedFixSentence']}

    **Word Translation Accuracy Evaluation**:

    Correctness: {eval_result['wordTranslationAccuracy']['isCorrect']}
    Explanation: {eval_result['wordTranslationAccuracy']['explanation']}
    Suggested Fix (if applicable): {eval_result['wordTranslationAccuracy']['suggestedFix']}

    **Sentence Translation Accuracy Evaluation**:

    Correctness: {eval_result['sentenceTranslationAccuracy']['isCorrect']}
    Explanation: {eval_result['sentenceTranslationAccuracy']['explanation']}
    Suggested Fix (if applicable): {eval_result['sentenceTranslationAccuracy']['suggestedFix']}

    ### Instructions:
    Review the Evaluation Feedback:

    Carefully consider all provided explanations and suggested fixes for the word or phrase, sentence, and overall translation accuracy.
    Translate the Word or Phrase:

    Choose a translation that balances accuracy with naturalness. It is acceptable to use a word that deviates slightly from the most direct translation if it results in a more fluent and idiomatic sentence.
    If the word usage is marked incorrect, incorporate the suggested fix or refine it further for better contextual alignment.
    Translate the Russian Sentence:

    Integrate the translated word or phrase naturally into the sentence.
    If the sentence translation is marked incorrect, incorporate the suggested fixes and adjust for fluency and clarity.
    Address Translation Accuracy Issues:

    If any translation inaccuracies are identified, apply the suggested fixes or clarify the meaning while ensuring the translation sounds natural and idiomatic.
    Leverage Context:

    Use the Finnish word or sentence as additional guidance where necessary.
    Produce a Polished Result:

    Ensure the final translation conveys the intended meaning, aligns naturally with the sentence, and incorporates feedback from the evaluation results.

    ### Response structure:

    Respond in JSON format with the following structure:
    {{
        "englishWord": "Translated word in Englsih",
        "englishSentence": "Translated sentence in English"
    }}
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['englishWord']
    tr_sentence = response_dict['englishSentence']

    card = {}
    card["wordFirstLang"] = ru_word
    card["sentenceFirstLang"] = ru_sentence
    card["wordSecondLang"] = tr_word.strip()
    card["sentenceSecondLang"] = tr_sentence.strip()
    card["id"] = card_id

    return card

In [10]:
def is_phrase_card(card):
    return card['sentenceFirstLang'] == ''

In [11]:
def check_eval(eval_result, is_phrase_card):
    if is_phrase_card:
        return eval_result['translationAccuracy']['isCorrect']
    
    w_tr_accuracy = eval_result['wordTranslationAccuracy']['isCorrect']
    s_tr_accuracy = eval_result['sentenceTranslationAccuracy']['isCorrect']
    s_accuracy = eval_result['englishSentenceCorrectness']['isCorrect']
    w_accuracy = eval_result['wordUsage']['isCorrect']
    
    return all([w_tr_accuracy, s_tr_accuracy, s_accuracy, w_accuracy])

In [12]:
def card_handler(ru_finn_card, num_iter=5):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    ru_eng_card = {}

    if is_phrase_card:
        ru_eng_card = generate_ru_eng_phrase_card(ru_finn_card)
    else:
        ru_eng_card = generate_ru_eng_full_card(ru_finn_card)

    eval_result = {}

    if is_phrase_card:
        eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    else:
        eval_result = eval_full_card(ru_finn_card, ru_eng_card)

    for _ in range(num_iter):
        if check_eval(eval_result, is_phrase_card):
            break
        
        if is_phrase_card:
            ru_eng_card = improve_ru_eng_phrase_card(ru_finn_card, ru_eng_card, eval_result)
        else:
            ru_eng_card = improve_ru_eng_full_card(ru_finn_card, ru_eng_card, eval_result)

        if is_phrase_card:
            eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
        else:
            eval_result = eval_full_card(ru_finn_card, ru_eng_card)
    
    return ru_eng_card, eval_result

In [13]:
def generate_ru_eng_cards_from_file(file_name):
    ru_finn_cards = get_ru_finn_cards_from_file(file_name)

    ru_eng_cards = []
    eval_results = []

    for ru_finn in tqdm(ru_finn_cards):
        while True:
            try:
                ru_eng, eval_res = card_handler(ru_finn)

                ru_eng_cards.append(ru_eng)
                eval_results.append(eval_res)

                break
            except Exception:
                pass

    write_cards_to_file(file_name, ru_eng_cards)
    write_eval_results_to_file(file_name, eval_results)

In [14]:
def regressor(ru_finn_card, ru_eng_cards, n_models=3, model=GPT_4o):
    ru_word, ru_sentence, finn_word, finn_sentence, card_id = ru_finn_card.values()

    tr_words = [card['wordSecondLang'] for card in ru_eng_cards]
    tr_sentences = [card['sentenceSecondLang'] for card in ru_eng_cards]

    models_response = ''
    for i in range(n_models):
        models_response += f"""{i + 1}. `Model {i + 1}` Response: {{ "englishWord": '{tr_words[i]}', "englishSentence": '{tr_sentences[i]}'}}\n"""

    system_prompt = '''Act as a translation evaluator and synthesizer. Assess model-generated translations for a Russian word/phrase and its sentence, prioritizing accuracy, fluency, and contextual fit. Return the best translations in JSON format with refinements if necessary.'''

    user_prompt = f"""
**Instruction for the Regressor**:  
Evaluate the translations provided by different models for a given Russian word or phrase and its accompanying sentence. Select or synthesize the best English translation for the word and sentence based on:
1. **Accuracy**: Ensure the translation reflects the original Russian meaning correctly.
2. **Naturalness**: The English translations should be fluid and use appropriate synonyms or phrasing where needed.
3. **Consistency**: The translated sentence should appropriately incorporate the word's translation.

### Input:
Given word or phrase (Russian): '{ru_word}'  
Given word or phrase (Finnish): '{finn_word}'  

Given sentence (Russian): '{ru_sentence}'  
Given sentence (Finnish): '{finn_sentence}'  

Responses from models:
{models_response}

---

### Task:
Critically evaluate these responses, identify the best translations, and synthesize a single, high-quality translation. If needed, refine the translations to ensure accuracy and naturalness. Do not simply copy; improve where necessary.

### Response structure:
Respond in JSON format with the following structure:
{{
    "englishWord": "Best translated word in English",
    "englishSentence": "Best translated sentence in English"
}}

---

### Key Considerations for Evaluation:
- **Accuracy**: Does the translation reflect the original meaning and nuances in Russian?
- **Fluency**: Is the translation grammatically correct and natural in English?
- **Contextual Fit**: Does the sentence correctly integrate the translation of the word or phrase?
    """

    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object" },
        messages=[
        {"role": "system", "content": system_prompt.strip()},
        {"role": "user", "content": user_prompt.strip()}
        ]
    )
    
    response_dict = json.loads(response.choices[0].message.content.strip())
    tr_word = response_dict['englishWord']
    tr_sentence = response_dict['englishSentence']

    card = {}
    card["wordFirstLang"] = ru_word
    card["sentenceFirstLang"] = ru_sentence
    card["wordSecondLang"] = tr_word.strip()
    card["sentenceSecondLang"] = tr_sentence.strip()
    card["id"] = card_id

    return card

In [15]:
def meaxture_of_agents(ru_finn_card, n_models=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    ru_eng_cards = []
    for _ in range(n_models):
        ru_eng_card = {}

        if is_phrase_card:
            ru_eng_card = generate_ru_eng_phrase_card(ru_finn_card)
        else:
            ru_eng_card = generate_ru_eng_full_card(ru_finn_card)

        ru_eng_cards.append(ru_eng_card)

    card = regressor(ru_finn_card, ru_eng_cards, n_models)

    return card

    #eval_result = {}

    #if is_phrase_card:
    #    eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    #else:
    #    eval_result = eval_full_card(ru_finn_card, ru_eng_card)

In [16]:
def moa_and_eval(ru_finn_card):
    ru_eng_card = meaxture_of_agents(ru_finn_card)
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    assert(ru_finn_card['id'] == ru_eng_card['id'])

    eval_res = {}
    if is_phrase_card:
        eval_res = eval_phrase_card(ru_finn_card, ru_eng_card)
    else:
        eval_res = eval_full_card(ru_finn_card, ru_eng_card)

    return ru_eng_card, eval_res

In [17]:
BASE_SYSTEM_PROMPT

'You are a multilingual assistant who is proficient in Russian, Finnish and English.'

In [18]:
system_prompts = [
    "Act as a linguistic expert specializing in Russian, Finnish, and English translations. Evaluate the given Russian word and sentence translations in conjunction with the Finnish context. Based on evaluation feedback, refine the word and sentence translations for accuracy, fluency, and naturalness, ensuring idiomatic English usage. Provide the response in JSON format.",
    '''You are a cultural consultant with expertise in Finnish culture. Translate this marketing material from Russian to English, adapting it to resonate with local customs and preferences.''',
    '''You are a professional translator specializing in basic sentences and phrases who is proficient in Russian, Finnish and English.'''
]

In [19]:
system_prompts = [
    "Act as a linguistic expert specializing in Russian, Finnish, and English translations. Evaluate the given Russian word and sentence translations in conjunction with the Finnish context. Based on evaluation feedback, refine the word and sentence translations for accuracy, fluency, and naturalness, ensuring idiomatic English usage. Provide the response in JSON format.",
    "Take on the role of a translator focusing on contextual accuracy. Use the provided Russian and Finnish inputs, along with feedback evaluations, to create an English translation that balances precise meaning with natural phrasing. Address inaccuracies and ensure the translation reads fluently and idiomatically. Respond in JSON format.",
    "Serve as a cultural localization specialist. Translate the provided Russian word and sentence into English, using the Finnish context and feedback to enhance naturalness and idiomatic expression. Focus on making the translation contextually relevant and fluent for an English-speaking audience. Provide your output in JSON format."
]

In [20]:
def meaxture_of_improving_agents(ru_finn_card, ru_eng_card, eval_result, n_models=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    improved_cards = []
    for s_p in system_prompts:
        improved_ru_eng_card = {}

        if is_phrase_card:
            improved_ru_eng_card = improve_ru_eng_phrase_card(ru_finn_card, ru_eng_card, eval_result, system_prompt=s_p)
        else:
            improved_ru_eng_card = improve_ru_eng_full_card(ru_finn_card, ru_eng_card, eval_result, system_prompt=s_p)

        improved_cards.append(improved_ru_eng_card)

    card = regressor(ru_finn_card, improved_cards, n_models)

    return card

    #eval_result = {}

    #if is_phrase_card:
    #    eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    #else:
    #    eval_result = eval_full_card(ru_finn_card, ru_eng_card)

In [21]:
def card_handler_with_agents(ru_finn_card, num_iter=5, num_agents=3):
    is_phrase_card = (ru_finn_card['sentenceFirstLang'] == '')

    ru_eng_card = {}

    if is_phrase_card:
        ru_eng_card = generate_ru_eng_phrase_card(ru_finn_card)
    else:
        ru_eng_card = generate_ru_eng_full_card(ru_finn_card)

    eval_result = {}

    if is_phrase_card:
        eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
    else:
        eval_result = eval_full_card(ru_finn_card, ru_eng_card)

    for _ in range(num_iter):
        if check_eval(eval_result, is_phrase_card):
            break
        
        if is_phrase_card:
            ru_eng_card = meaxture_of_improving_agents(ru_finn_card, ru_eng_card, eval_result, num_agents)
        else:
            ru_eng_card = meaxture_of_improving_agents(ru_finn_card, ru_eng_card, eval_result, num_agents)

        if is_phrase_card:
            eval_result = eval_phrase_card(ru_finn_card, ru_eng_card)
        else:
            eval_result = eval_full_card(ru_finn_card, ru_eng_card)
    
    return ru_eng_card, eval_result

In [25]:
def generate_ru_eng_cards_from_file(file_name):
    ru_finn_cards = get_ru_finn_cards_from_file(file_name)

    ru_eng_cards = []
    eval_results = []

    for ru_finn in tqdm(ru_finn_cards):
        while True:
            try:
                ru_eng, eval_res = card_handler_with_agents(ru_finn)

                ru_eng_cards.append(ru_eng)
                eval_results.append(eval_res)

                break
            except Exception:
                pass

    assert(len(ru_eng_cards) == len(eval_results))

    results = []
    for ru_eng, eval_res in list(zip(ru_eng_cards, eval_results)):
        is_phrase_card = (ru_eng['sentenceFirstLang'] == '')
        results.append(check_eval(eval_res, is_phrase_card))
    
    write_cards_to_file(file_name, ru_eng_cards)
    write_eval_results_to_file(file_name, eval_results)

In [26]:
def check_file_accuracy(file_name):
    ru_eng_cards = get_ru_eng_cards_from_file(file_name)
    eval_results = get_eval_results_from_file(file_name)

    assert(len(ru_eng_cards) == len(eval_results))

    results = []
    for ru_eng, eval_res in list(zip(ru_eng_cards, eval_results)):
        is_phrase_card = (ru_eng['sentenceFirstLang'] == '')
        results.append(check_eval(eval_res, is_phrase_card))
    
    return sum(results) / len(results)

In [27]:
file_names = [f'sm1_new_kap{i}.json' for i in range(1, 10)]

In [28]:
for file_name in tqdm(file_names):
    generate_ru_eng_cards_from_file(file_name)
    print(check_file_accuracy(file_name))

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

0.9867549668874173


  0%|          | 0/192 [00:00<?, ?it/s]

1.0


  0%|          | 0/238 [00:00<?, ?it/s]

0.9789915966386554


  0%|          | 0/267 [00:00<?, ?it/s]

0.9812734082397003


  0%|          | 0/153 [00:00<?, ?it/s]

0.9934640522875817


  0%|          | 0/239 [00:00<?, ?it/s]

0.9916317991631799


  0%|          | 0/184 [00:00<?, ?it/s]

0.9891304347826086


  0%|          | 0/260 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [54]:
print('------', sum(res) / len(res))

------ 0.9851174600819523


In [24]:
0.9933774834437086 + 0.9895833333333334 + 0.9915966386554622 + 0.9850187265917603 + 0.9869281045751634 + 0.9748953974895398 + 0.9728260869565217 + 0.9846153846153847 + 0.9778761061946902

8.856717261855565

In [25]:
8.856717261855565 / 9

0.9840796957617294

In [36]:
check_moa(file_names[0])

  0%|          | 0/151 [00:00<?, ?it/s]

1.0

In [37]:
check_moa(file_names[1])

  0%|          | 0/192 [00:00<?, ?it/s]

1.0

In [38]:
check_moa(file_names[2])

  0%|          | 0/238 [00:00<?, ?it/s]

: 