In [29]:
#!/usr/bin/env python
# coding: utf-8
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64


# create ".env" file and put the line with the key in it
# OPENAI_API_KEY="<get_your_key_from_platform.openai.com"
def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
# client = openai.OpenAI(api_key=OPENAI_API_KEY)  # Replace "YOUR_API_KEY" with your actual OpenAI API key

azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

In [30]:
def generate_ru_eng_cards(basic_cards, system_prompt, model=GPT_4o):
    cards = []
    for basic_card in tqdm(basic_cards, desc="Generating ru-eng cards"):
        #         prompt = f"Generate a simple, useful, beginner-friendly sentence in Finnish using the word or phrase: '{word}'. The sentence should be useful and easy to understand for someone learning Finnish."
        # prompt = f"Generate a simple, useful, beginner-friendly sentence in Finnish using the word or phrase: '{word}'. The sentence should be useful and easy to understand for someone learning Finnish. Provide only the Finnish sentence, without any translation or additional information."
        #user_prompt = f'''You will get the pair of word or phrase and sentence which contains this word or phrase on Russian then translate it into English. You should translate the word or phrase then translate given sentence into English language.
        #
        #=== 
        #Given word or phrase: '{word}'
        #===
        #Given sentence: '{sentence}'
        #===
        # 
        #The english translated sentence should contain english translated word. Return only english translated word and english translated sentence using comma as seperator and nothing else.
        #'''
        word, sentence, _, _, _ = basic_card.values()
        
        word_prompt = f"""Translate given word or phrase into English.
        ===
        Given word: '{word}'
        ===
        Return only translated word or phrase and nothing else.
        """

        response = client.chat.completions.create(
          model=model,
          messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": word_prompt}
          ]
        )

        tr_word = response.choices[0].message.content if response.choices else 'No response generated.'
        
        sentence_prompt = f"""Translate given sentence into English. Make sure it contains this word {tr_word}
        ===
        Given sentence: '{sentence}'
        ===
        Return only translated word or phrase and nothing else. Make sentence grammaticly correct and idiomatic.
        """

        response = client.chat.completions.create(
          model=model,
          messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": sentence_prompt}
          ]
        )

        tr_sentence = response.choices[0].message.content if response.choices else 'No response generated.'
        
        card = {}
        card["wordFirstLang"] = word
        card["sentenceFirstLang"] = sentence
        card["wordSecondLang"] = tr_word.strip()
        card["sentenceSecondLang"] = tr_sentence.strip()
        cards.append(card)
        
    return cards


In [31]:
import json


FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def load_cards_from_file(file_name):
    data = []
    file_path = FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

In [32]:
ru_cards_1 = load_cards_from_file('sm1_new_kap1.json')

In [33]:
system_prompt = '''You are the Russian to English translater for beginners. Translate given (word, sentence) pair from Russian to English'''
ru_eng_cards_1 = generate_ru_eng_cards(ru_cards_1, system_prompt, model=GPT_4o)

Generating ru-eng cards:   0%|          | 0/151 [00:00<?, ?it/s]

In [35]:
results_1 = evaluate_cards(ru_eng_cards_1, second_lang='English')

  0%|          | 0/151 [00:00<?, ?it/s]

In [38]:
len([res['translationAccuracy']['isCorrect'] for res in results_1]) - sum([res['translationAccuracy']['isCorrect'] for res in results_1])

24

In [37]:
for res in results_1:
    if res['translationAccuracy']['isCorrect'] == 0:
        print(res)
        break

{'RussianSentence': {'isCorrect': True, 'explanation': "The sentence 'Здесь много людей.' is grammatically correct and natural in Russian.", 'suggestedFix': None}, 'EnglishSentence': {'isCorrect': False, 'explanation': "The sentence 'There are here many people.' is not grammatically correct. The correct structure is: 'There are many people here.'", 'suggestedFix': 'There are many people here.'}, 'translationAccuracy': {'isCorrect': False, 'explanation': "The translation 'Здесь много людей.' is not accurately translated as 'There are here many people.' but as 'There are many people here.'", 'suggestedFix': 'There are many people here.'}, 'RussianWordUsage': {'isCorrect': True, 'explanation': "The word 'здесь' appears in the sentence 'Здесь много людей.'", 'suggestedFixSentence': None, 'suggestedFixWord': None}, 'EnglishWordUsage': {'isCorrect': False, 'explanation': "The word 'here' should be at the end of the sentence to sound natural.", 'suggestedFix': 'There are many people here.'}}


In [8]:
ru_cards_2 = load_cards_from_file('sm1_new_kap2.json')

In [21]:
system_prompt = '''You are the Russian to English translater for beginners. Translate given (word, sentence) pair from Russian to English'''
ru_eng_cards_2 = generate_ru_eng_cards(ru_cards_2, system_prompt, model=GPT_4o)

Generating ru-eng cards:   0%|          | 0/192 [00:00<?, ?it/s]

In [34]:
def evaluate_cards(cards, first_lang='Russian', second_lang='Finnish', model=GPT_4o):
    # Construct the prompt
    results = []
    for card in tqdm(cards):
        system_prompt = f"""
        You are a language expert proficient in both {first_lang} and {second_lang}."""

        user_prompt = f"""
        You are given a flashcard containing a word and a sentence in {first_lang}, along with its translation and 
        a sentence in {second_lang}. Your task is to evaluate the correctness of the flashcard. Here’s what you need to check:

        1. Whether the sentence in {first_lang} is grammatically correct and natural.
        2. Whether the sentence in {second_lang} is grammatically correct and natural.
        3. Whether the translation from {second_lang} to {first_lang} is accurate.
        4. Whether the word in {first_lang} occurs in some form in the {first_lang} sentence.
        5. Whether the word in {second_lang} occurs in some form in the {second_lang} sentence.

        If both sentence in {first_lang} and {second_lang} are missing it is ok, just check the translation to {second_lang}. 

        Check that the word in {first_lang} appear in the sentence in some form, diregarding the case and form of the word, or the order of the words, 
        if the words in {first_lang} consists of several words. For example if the word is 'в то же время' and in sentence it's 'в одно и то же время', it's ok. Or if 'когда опять' in the words and 'когда моя сестра опять' is in the sentence. 
        If there are several synonyms in {first_lang} word field, then if one of them appears in the sentences in some form it's ok. 
        For example if {first_lang} word is 'выносить, красть' and in sentence 'украсть' is used it's ok.

        If the word doesn't appear in some form, suggest using synonyms or related terms in the 
        translation to ensure the sentence remains natural and accurate. You can change either the sentence translation 
        or the word translation. For example, if the word is "досуг," it can be translated as "досуг" or "свободное время". So if the word "свободное время" 
        is in some form used in the sentence and it sounds natural, suggested fix can be to change the word translation to "досуг, свободное время", 
        keeping the sentence intact




        In the suggestedFix fields don't provide explanations or instructions, just provide the final corrected string.
        If it's better to fix word, not sentence, return null for suggestedFixSentence.


        Provide a detailed evaluation for each point and suggest fixes where necessary.

        Here is the flashcard:

        Word in {first_lang}: {card['wordFirstLang']}
        Sentence in {first_lang}: {card['sentenceFirstLang']}
        Word in {second_lang}: {card['wordSecondLang']}
        Sentence in {second_lang}: {card['sentenceSecondLang']}

        Respond in JSON format with the following structure:

        {{
          "{first_lang}Sentence": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }},
          "{second_lang}Sentence": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }},
          "translationAccuracy": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }},
          "{first_lang}WordUsage": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFixSentence": "Suggested correction to the sentence if there is an issue"
            "suggestedFixWord": "Suggested correction to the word if there is an issue"
          }},
          "{second_lang}WordUsage": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }}
        }}


        If both sentence in {first_lang} and {second_lang} are missing return only translationAccuracy block.
        """

        response = client.chat.completions.create(
          model="gpt-4o",
          response_format={ "type": "json_object" },
          messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
          ]
        )
        
        results.append(json.loads(response.choices[0].message.content.strip()))
    return results

In [25]:
ru_eng_cards_2[0]

{'wordFirstLang': 'парк',
 'sentenceFirstLang': 'Давай встретимся в парке.',
 'wordSecondLang': 'park',
 'sentenceSecondLang': "Let's meet in the park."}

In [26]:
results_2 = evaluate_cards(ru_eng_cards_2, second_lang='English')

  0%|          | 0/192 [00:00<?, ?it/s]

In [27]:
sum([res['translationAccuracy']['isCorrect'] for res in results_2]) / len([res['translationAccuracy']['isCorrect'] for res in results_2])

0.8802083333333334

In [28]:
for res in results_2:
    if res['translationAccuracy']['isCorrect'] == 0:
        print(res)
        break

{'RussianSentence': {'isCorrect': True, 'explanation': "The sentence in Russian 'Ну, может кого-то другого.' is grammatically correct and natural.", 'suggestedFix': None}, 'EnglishSentence': {'isCorrect': False, 'explanation': "The sentence 'Maybe someone other.' is not grammatically correct and natural in English. The correct phrase should be 'Maybe someone else.'", 'suggestedFix': 'Maybe someone else.'}, 'translationAccuracy': {'isCorrect': False, 'explanation': "The translation of the sentence 'Ну, может кого-то другого.' to 'Maybe someone other.' is incorrect. A more accurate translation would be 'Maybe someone else.'", 'suggestedFix': 'Maybe someone else.'}, 'RussianWordUsage': {'isCorrect': True, 'explanation': "The word 'другой' appears in the sentence in the form 'другого'.", 'suggestedFixSentence': None, 'suggestedFixWord': None}, 'EnglishWordUsage': {'isCorrect': True, 'explanation': "The concept of 'other' appears in the sentence in the context of 'someone else'.", 'suggeste

In [None]:
def 