In [1]:
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

In [9]:
source_lang = 'Finnish'
secondary_lang = 'Russian'
target_lang = 'English'

In [3]:
import json


EVAL_FOLDER = '../../data/russian-english/cards/eval_results/ru_eng_'
def get_ru_finn_eval_results_from_file(file_name):
    data = []
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

EVAL_FOLDER = '../../data/english-finnish/cards/eval_results/eng_finn_'
def get_eng_finn_eval_results_from_file(file_name):
    data = []
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

RU_ENG_OUTPUT_FOLDER = '../../data/russian-english/cards/test_cards/ru_eng_'
def get_ru_eng_cards_from_file(file_name):
    data = []
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_FINN_FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def get_ru_finn_cards_from_file(file_name):
    data = []
    file_path = RU_FINN_FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    res = []
    for card in data:
        if 'isMarkedDeleted' not in card.keys():
            res.append(card)
    return res


ENG_FINN_OUTPUT_FOLDER = '../../data/english-finnish/cards/test_cards/eng_finn_'
def get_eng_finn_cards_from_file(file_name):
    data = []
    file_path = ENG_FINN_OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


OUTPUT_FOLDER = '../../data/english-finnish/cards/test_cards/eng_finn_'
def write_cards_to_file(file_name, cards):
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(cards, f, ensure_ascii=False, indent=2)
        
        
EVAL_FOLDER = '../../data/english-finnish/cards/eval_results/eng_finn_'
def write_eval_results_to_file(file_name, results):
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

In [4]:
def get_ru_fi_cards():
    all_cards = list()
    
    file_names = [f'sm1_new_kap{i}.json' for i in range(1, 10)]
    file_names.extend([f'sm2_new_kap{i}.json' for i in range(1, 9)])
    file_names.append('sm2_new_puhekieli.json')
    file_names.extend([f'sm3_kap{i}.json' for i in range(1, 9)])
    file_names.extend([f'sm4_kap{i}.json' for i in range(1, 6)])
    
    for file_name in file_names:
        cards = get_ru_finn_cards_from_file(file_name)
        all_cards.extend(cards)
    
    return all_cards

In [5]:
import numpy as np

ru_fi_cards_sample = np.random.choice(list(get_ru_fi_cards()), size=30, replace=False)

In [6]:
ru_cards_sample = [{'word': card["wordFirstLang"], 'sentence': card["sentenceFirstLang"], 'id': card["id"]} for card in ru_fi_cards_sample]
fi_cards_sample = [{'word': card["wordSecondLang"], 'sentence': card["sentenceSecondLang"], 'id': card["id"]} for card in ru_fi_cards_sample]

In [7]:
from unified_card_generation_pipeline import process_cards_batch

target_cards, eval_results = process_cards_batch(ru_cards_sample, fi_cards_sample, 'Finnish', 'English', 'Russian')

Processing cards: 100%|██████████| 30/30 [27:02<00:00, 54.10s/it] 


In [16]:
import os
import json
from datetime import datetime

def write_cards_to_tmp_file(cards, evals, output_dir="output"):
    os.makedirs(output_dir, exist_ok=True)
    
    file_path = os.path.join(output_dir, "cards_tmp.json")
    eval_path = os.path.join(output_dir, "evals_tmp.json")
    
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(cards, f, ensure_ascii=False, indent=2)

    with open(eval_path, 'w', encoding='utf-8') as f:
        json.dump(evals, f, ensure_ascii=False, indent=2)

In [17]:
write_cards_to_tmp_file(target_cards, eval_results)

In [15]:
target_cards

[{'word': 'take', 'sentence': 'Can I take this book?', 'id': 1261},
 {'word': 'year by year',
  'sentence': 'Year by year, life gets better.',
  'id': 5689},
 {'word': 'apology', 'sentence': 'Can you accept my apology?', 'id': 7272},
 {'word': 'really',
  'sentence': 'Could you really take out the trash?',
  'id': 5756},
 {'word': 'Liisa met her cousin on the street', 'sentence': '', 'id': 7097},
 {'word': 'shared apartment',
  'sentence': "I'm moving to Helsinki and looking for a shared apartment to rent.",
  'id': 4919},
 {'word': 'thermometer',
  'sentence': 'The thermometer now shows ten degrees below zero.',
  'id': 3453},
 {'word': 'case', 'sentence': 'This is a very rare case.', 'id': 7250},
 {'word': 'intact', 'sentence': 'The book is intact.', 'id': 6018},
 {'word': 'agree',
  'sentence': 'He agreed to help me with the task.',
  'id': 5719},
 {'word': 'in that direction',
  'sentence': 'We are going in that direction.',
  'id': 2619},
 {'word': 'person', 'sentence': 'He is a s

In [7]:
from unified_card_generation_pipeline import process_cards_batch

target_cards, eval_results = process_cards_batch(ru_cards_sample, fi_cards_sample, 'Finnish', 'English', 'Russian', use_evaluation_feedback=True)

Processing cards: 100%|██████████| 30/30 [17:31<00:00, 35.05s/it]


In [8]:
# Analyze evaluation results
total_cards = len(eval_results)
correct_counts = {
    'sentenceCorrectness': 0,
    'wordUsage': 0, 
    'wordTranslationAccuracy': 0,
    'sentenceTranslationAccuracy': 0
}

# Count correct evaluations for each metric
for result in eval_results:
    for metric in correct_counts.keys():
        if result[metric].get('isCorrect', False):
            correct_counts[metric] += 1
            
# Calculate percentages
percentages = {
    metric: (count/total_cards) * 100 
    for metric, count in correct_counts.items()
}

print("Evaluation Results Analysis:")
print("-" * 50)
print(f"Total cards evaluated: {total_cards}")
print("\nCorrect percentages by metric:")
for metric, percentage in percentages.items():
    print(f"{metric}: {percentage:.1f}%")

# Find cards with issues
cards_with_issues = []
for i, result in enumerate(eval_results):
    issues = []
    for metric in correct_counts.keys():
        if not result[metric].get('isCorrect', True):
            issues.append({
                'metric': metric,
                'explanation': result[metric].get('explanation', 'No explanation provided')
            })
    if issues:
        cards_with_issues.append({
            'card_id': result.get('id', f'Card {i}'),
            'issues': issues
        })

if cards_with_issues:
    print("\nCards with issues:")
    print("-" * 50)
    for card in cards_with_issues:
        print(f"\nCard ID: {card['card_id']}")
        for issue in card['issues']:
            print(f"- {issue['metric']}: {issue['explanation']}")


Evaluation Results Analysis:
--------------------------------------------------
Total cards evaluated: 30

Correct percentages by metric:
sentenceCorrectness: 96.7%
wordUsage: 93.3%
wordTranslationAccuracy: 93.3%
sentenceTranslationAccuracy: 96.7%

Cards with issues:
--------------------------------------------------

Card ID: 3455
- sentenceCorrectness: The English sentence 'The sauna has good steam today' is understandable, but not entirely natural. A more common expression would use the word 'heat' or describe the sauna experience more naturally.
- wordUsage: In the context of a sauna, 'steam' does not typically represent the sauna's warmth or experience, which is what is meant in Finnish ('löyly') and Russian ('пар'). 'Heat' or 'steam' in the context of sauna experience is more appropriate.
- wordTranslationAccuracy: 'Steam' is a possible translation, but doesn't capture the specific sauna context of 'löyly' (Finnish) or 'пар' (Russian). Both are referring to the heat and steam exp

In [7]:
from unified_card_generation_pipeline import process_cards_batch

moa_target_cards, moa_eval_results = process_cards_batch(ru_cards_sample, fi_cards_sample, 'Finnish', 'English', 'Russian', use_moa=True)

Processing cards: 100%|██████████| 30/30 [23:44<00:00, 47.50s/it]


In [8]:
moa_target_cards

[{'word': 'neighborhood',
  'sentence': "I'm going for a walk in the neighborhood to explore new places.",
  'id': 7023},
 {'word': 'Time unit such as an hour, day, week...',
  'sentence': '',
  'id': 3025},
 {'word': 'No way',
  'sentence': "No! Absolutely not! It can't be! No way!",
  'id': 5751},
 {'word': 'remember',
  'sentence': 'Remember to take your keys with you.',
  'id': 5421},
 {'word': 'selection',
  'sentence': 'The store has a wide selection of fruits and vegetables.',
  'id': 6842},
 {'word': 'Those girls are Finnish.', 'sentence': '', 'id': 4681},
 {'word': 'use', 'sentence': 'Can I use your computer?', 'id': 4368},
 {'word': 'restaurants',
  'sentence': 'They are coming out of the restaurants.',
  'id': 4495},
 {'word': 'to have a crush', 'sentence': 'I have a crush on you.', 'id': 4182},
 {'word': 'but', 'sentence': 'I want to go, but I am tired.', 'id': 1155},
 {'word': 'to end', 'sentence': 'The movie is going to end soon.', 'id': 1184},
 {'word': 'their', 'sentenc

In [9]:
# Analyze evaluation results
total_cards = len(moa_eval_results)
correct_counts = {
    'sentenceCorrectness': 0,
    'wordUsage': 0, 
    'wordTranslationAccuracy': 0,
    'sentenceTranslationAccuracy': 0
}

# Count correct evaluations for each metric
for result in moa_eval_results:
    for metric in correct_counts.keys():
        if result[metric].get('isCorrect', False):
            correct_counts[metric] += 1
            
# Calculate percentages
percentages = {
    metric: (count/total_cards) * 100 
    for metric, count in correct_counts.items()
}

print("Evaluation Results Analysis:")
print("-" * 50)
print(f"Total cards evaluated: {total_cards}")
print("\nCorrect percentages by metric:")
for metric, percentage in percentages.items():
    print(f"{metric}: {percentage:.1f}%")

# Find cards with issues
cards_with_issues = []
for i, result in enumerate(moa_eval_results):
    issues = []
    for metric in correct_counts.keys():
        if not result[metric].get('isCorrect', True):
            issues.append({
                'metric': metric,
                'explanation': result[metric].get('explanation', 'No explanation provided')
            })
    if issues:
        cards_with_issues.append({
            'card_id': result.get('id', f'Card {i}'),
            'issues': issues
        })

if cards_with_issues:
    print("\nCards with issues:")
    print("-" * 50)
    for card in cards_with_issues:
        print(f"\nCard ID: {card['card_id']}")
        for issue in card['issues']:
            print(f"- {issue['metric']}: {issue['explanation']}")


Evaluation Results Analysis:
--------------------------------------------------
Total cards evaluated: 30

Correct percentages by metric:
sentenceCorrectness: 100.0%
wordUsage: 100.0%
wordTranslationAccuracy: 86.7%
sentenceTranslationAccuracy: 96.7%

Cards with issues:
--------------------------------------------------

Card ID: 5751
- sentenceTranslationAccuracy: The sentences in Finnish and Russian do not match exactly in words, but their meanings are conveyed accurately when translated to English. However, the Finnish sentence 'Нет! Никак нет! Не может быть! Ни в коем случае!' and the Russian sentence 'Eikä! Eipäs! Eihän! Ei todellakaan!' both translate well to 'No! Absolutely not! It can't be! No way!' Still, there is a small contextual difference in how disbelief is expressed in the original languages which might be nuanced in English.

Card ID: 1089
- wordTranslationAccuracy: The Finnish translation 'neljäkymmentä' means 'forty', and the Russian translation 'сорок' also means 'fo