In [1]:
#!/usr/bin/env python
# coding: utf-8
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64
import asyncio
from tqdm.asyncio import tqdm_asyncio


# create ".env" file and put the line with the key in it
# OPENAI_API_KEY="<get_your_key_from_platform.openai.com"
def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
# client = openai.OpenAI(api_key=OPENAI_API_KEY)  # Replace "YOUR_API_KEY" with your actual OpenAI API key

azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AsyncAzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

In [2]:
async def generate_english_sentences_async(words, system_prompt, model="GPT_4o"):
    word_to_sentence = {}
    
    async def fetch_sentence(word):
        user_prompt = f"""Generate an idiomatic, simple, useful sentence in English containing this word or phrase:
        
        === 
        '{word}'
        ===
         
        The sentence should be useful and easy to understand for someone who just started learning English. Provide only the English sentence, without any translation or additional information.
        """
        delay = 60
        while True:
            try:
                response = await client.chat.completions.create(
                    model=model,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ]
                )
                break
            except Exception:
                await asyncio.sleep(delay)
                continue
        
        
        sentence = response.choices[0].message.content if response.choices else 'No response generated.'
        word_to_sentence[word] = sentence.strip()
    
    tasks = [fetch_sentence(word) for word in words]
    await tqdm_asyncio.gather(*tasks, desc="Generating sentence examples")
    
    return word_to_sentence

In [3]:
system_prompt = '''You are generator of simple and usefull sentences.'''
await generate_english_sentences_async(["Apple", "Cat", "Brown", "Red", "Basic", "Train", "Forget", "Class", "School", "Last", "Stop talkng", "Nothing", "Justin"], system_prompt, model=GPT_4o)

Generating sentence examples: 100%|████████████████████████████████████████████████████| 13/13 [00:12<00:00,  1.02it/s]


{'Nothing': 'Nothing lasts forever.',
 'Last': 'This is the last apple in the basket.',
 'Basic': "It's important to learn the basic rules of grammar.",
 'School': 'I go to school every day except Sunday.',
 'Stop talkng': 'Please stop talking and listen.',
 'Red': 'She wore a beautiful red dress to the party.',
 'Class': 'The class starts at 9 AM every day.',
 'Forget': "Don't forget to lock the door.",
 'Apple': 'An apple a day keeps the doctor away.',
 'Brown': 'The coffee table in the living room is brown.',
 'Cat': 'The cat is sleeping on the couch.',
 'Train': 'The train will arrive at the station in ten minutes.',
 'Justin': 'Justin loves to read books in the library.'}

In [4]:
import json


FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def load_cards_from_file(file_name):
    data = []
    file_path = FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

In [5]:
file_names = [f'sm1_new_kap{i}.json' for i in range(1, 2)]

first_chapter = []
for file_name in tqdm(file_names):
    first_chapter.extend(load_cards_from_file(file_name))

  0%|          | 0/1 [00:00<?, ?it/s]

In [8]:
import os
import asyncio
import json
from tqdm.asyncio import tqdm_asyncio

client = AsyncAzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

async def evaluate_cards_async(cards, first_lang='Russian', second_lang='Finnish', model="GPT_4o"):
    results = []
    
    async def evaluate_card(card):
        system_prompt = f"""
        You are a language expert proficient in both {first_lang} and {second_lang}."""

        user_prompt = f"""
        You are given a flashcard containing a word and a sentence in {first_lang}, along with its translation and 
        a sentence in {second_lang}. Your task is to evaluate the correctness of the flashcard. Here’s what you need to check:

        1. Whether the sentence in {first_lang} is grammatically correct and natural.
        2. Whether the sentence in {second_lang} is grammatically correct and natural.
        3. Whether the translation from {second_lang} to {first_lang} is accurate.
        4. Whether the word in {first_lang} occurs in some form in the {first_lang} sentence.
        5. Whether the word in {second_lang} occurs in some form in the {second_lang} sentence.

        If both sentence in {first_lang} and {second_lang} are missing it is ok, just check the translation to {second_lang}. 

        Check that the word in {first_lang} appear in the sentence in some form, disregarding the case and form of the word, or the order of the words, 
        if the words in {first_lang} consists of several words. For example, if the word is 'в то же время' and in the sentence it's 'в одно и то же время', it's ok. Or if 'когда опять' in the words and 'когда моя сестра опять' is in the sentence. 
        If there are several synonyms in {first_lang} word field, then if one of them appears in the sentences in some form it's ok. 
        For example, if {first_lang} word is 'выносить, красть' and in the sentence 'украсть' is used, it's ok.

        If the word doesn't appear in some form, suggest using synonyms or related terms in the 
        translation to ensure the sentence remains natural and accurate. You can change either the sentence translation 
        or the word translation. For example, if the word is "досуг," it can be translated as "досуг" or "свободное время". So if the word "свободное время" 
        is in some form used in the sentence and it sounds natural, suggested fix can be to change the word translation to "досуг, свободное время", 
        keeping the sentence intact.

        In the suggestedFix fields, don't provide explanations or instructions, just provide the final corrected string.
        If it's better to fix the word, not the sentence, return null for suggestedFixSentence.

        Provide a detailed evaluation for each point and suggest fixes where necessary.

        Here is the flashcard:

        Word in {first_lang}: {card['wordFirstLang']}
        Sentence in {first_lang}: {card['sentenceFirstLang']}
        Word in {second_lang}: {card['wordSecondLang']}
        Sentence in {second_lang}: {card['sentenceSecondLang']}

        Respond in JSON format with the following structure:

        {{
          "{first_lang}Sentence": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }},
          "{second_lang}Sentence": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }},
          "translationAccuracy": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }},
          "{first_lang}WordUsage": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFixSentence": "Suggested correction to the sentence if there is an issue",
            "suggestedFixWord": "Suggested correction to the word if there is an issue"
          }},
          "{second_lang}WordUsage": {{
            "isCorrect": true/false,
            "explanation": "Detailed explanation if there is an issue or why it's correct",
            "suggestedFix": "Suggested correction if there is an issue"
          }}
        }}

        If both sentences in {first_lang} and {second_lang} are missing, return only the translationAccuracy block.
        """

        delay = 10
        while True:
            try:
                response = await client.chat.completions.create(
                    model=model,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ]
                )
                break
            except Exception:
                await asyncio.sleep(delay)
                continue
        
        try:
            results.append(json.loads(response.choices[0].message.content.strip()))
        except Exception:
            pass
    
    # Create tasks for each card to evaluate them concurrently
    tasks = [evaluate_card(card) for card in cards]
    await tqdm_asyncio.gather(*tasks, desc="Evaluating flashcards")
    
    return results

# Example usage
# asyncio.run(evaluate_cards_async(cards, first_lang, second_lang, model))


In [9]:
results = await evaluate_cards_async(first_chapter[:100])


Evaluating flashcards:   0%|                                                                   | 0/100 [00:00<?, ?it/s][A
Evaluating flashcards:   1%|▌                                                        | 1/100 [00:43<1:11:23, 43.27s/it][A
Evaluating flashcards:   3%|█▊                                                         | 3/100 [00:43<18:12, 11.27s/it][A
Evaluating flashcards:   7%|████▏                                                      | 7/100 [00:45<06:02,  3.90s/it][A
Evaluating flashcards:   8%|████▋                                                      | 8/100 [00:45<05:02,  3.29s/it][A
Evaluating flashcards:   9%|█████▎                                                     | 9/100 [00:46<04:15,  2.81s/it][A
Evaluating flashcards:  10%|█████▊                                                    | 10/100 [01:45<24:02, 16.03s/it][A
Evaluating flashcards:  11%|██████▍                                                   | 11/100 [01:45<17:50, 12.02s/it][A
Evaluating flas

In [20]:
len(results)

66

In [10]:
sum([res['translationAccuracy']['isCorrect'] for res in results]) / len([res['translationAccuracy']['isCorrect'] for res in results])

0.9770114942528736