In [36]:
import openai
from openai import AzureOpenAI, AsyncAzureOpenAI
from tqdm.notebook import tqdm
import os
from dotenv import load_dotenv, find_dotenv
import requests
import base64

GPT_3_5_TURBO = "gpt-3.5-turbo"
GPT_4_TURBO_PREVIEW = "gpt-4-turbo-preview"
GPT_4 = 'gpt-4'
GPT_4o = 'gpt-4o'

def get_openai_api_key():
    _ = load_dotenv(find_dotenv())

    return os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY = get_openai_api_key()
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

In [37]:
import json


EVAL_FOLDER = '../../data/russian-english/cards/eval_results/ru_eng_'
def get_eval_results_from_file(file_name):
    data = []
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_ENG_OUTPUT_FOLDER = '../../data/russian-english/cards/test_cards/ru_eng_'
def get_ru_eng_cards_from_file(file_name):
    data = []
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


RU_FINN_FOLDER_WITH_JSON = '../../data/russian-finnish/cards/curated_platform_cards/'
def get_ru_finn_cards_from_file(file_name):
    data = []
    file_path = RU_FINN_FOLDER_WITH_JSON + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    res = []
    for card in data:
        if 'isMarkedDeleted' not in card.keys():
            res.append(card)
    return res


ENG_FINN_OUTPUT_FOLDER = '../../data/english-finnish/cards/test_cards/eng_finn_'
def get_eng_finn_cards_from_file(file_name):
    data = []
    file_path = ENG_FINN_OUTPUT_FOLDER + file_name
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data


OUTPUT_FOLDER = '../../data/english-finnish/cards/test_cards/eng_finn_'
def write_cards_to_file(file_name, cards):
    file_path = OUTPUT_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(cards, f, ensure_ascii=False, indent=2)
        
        
EVAL_FOLDER = '../../data/english-finnish/cards/eval_results/eng_finn_'
def write_eval_results_to_file(file_name, results):
    file_path = EVAL_FOLDER + file_name
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

In [38]:
def check_eval(eval_result, is_phrase_card):
    if is_phrase_card:
        return eval_result['translationAccuracy']['isCorrect']
    
    w_tr_accuracy = eval_result['wordTranslationAccuracy']['isCorrect']
    s_tr_accuracy = eval_result['sentenceTranslationAccuracy']['isCorrect']
    s_accuracy = eval_result['englishSentenceCorrectness']['isCorrect']
    w_accuracy = eval_result['wordUsage']['isCorrect']
    
    return all([w_tr_accuracy, s_tr_accuracy, s_accuracy, w_accuracy])

In [39]:
def get_bad_cards_from_files(file_names):
    results = []
    for file_name in file_names:
        eng_finn_cards = get_eng_finn_cards_from_file(file_name)
        eval_results = get_eval_results_from_file(file_name)

        assert(len(eng_finn_cards) == len(eval_results))

        for eng_finn, eval_res in list(zip(eng_finn_cards, eval_results)):
            is_phrase_card = (eng_finn['sentenceFirstLang'] == '')
            if not check_eval(eval_res, is_phrase_card):
                eng_finn['is_phrase_card'] = is_phrase_card
                eng_finn.update(eval_res)
                full_output = eng_finn
                results.append(full_output)
    
    return results

In [27]:
file_names = [f'sm1_new_kap{i}.json' for i in range(1, 10)]
file_names.extend([f'sm2_new_kap{i}.json' for i in range(1, 9)])
file_names.append('sm2_new_puhekieli.json')
file_names.extend([f'sm3_kap{i}.json' for i in range(1, 9)])

In [45]:
fourth_chapter = [f'sm4_kap{i}.json' for i in range(1, 6)]

In [46]:
bad_cards = get_bad_cards_from_files(fourth_chapter)

In [47]:
import pandas as pd

bad_cards_df = pd.DataFrame(bad_cards)

In [48]:
bad_cards_df

Unnamed: 0,wordFirstLang,sentenceFirstLang,wordSecondLang,sentenceSecondLang,id,is_phrase_card,englishSentenceCorrectness,wordUsage,wordTranslationAccuracy,sentenceTranslationAccuracy,translationAccuracy
0,evening outing,Shall we go out together this evening?,iltameno,Lähdetäänkö yhdessä iltamenoihin tänä iltana?,6375,False,"{'isCorrect': True, 'explanation': 'The Englis...","{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': True, 'explanation': 'The word '...","{'isCorrect': False, 'explanation': 'The Engli...",
1,diversely,He can sing diversely.,monipuolisesti,Hän osaa laulaa monipuolisesti.,6427,False,"{'isCorrect': False, 'explanation': 'The sente...","{'isCorrect': False, 'explanation': 'Although ...","{'isCorrect': True, 'explanation': 'The word '...","{'isCorrect': True, 'explanation': 'The Englis...",
2,weekday,The weekdays usually start on Monday.,arki,Arkipäivät alkavat yleensä maanantaina.,6440,False,"{'isCorrect': True, 'explanation': 'The Englis...","{'isCorrect': True, 'explanation': 'The word '...","{'isCorrect': False, 'explanation': 'The word ...","{'isCorrect': False, 'explanation': 'The sente...",
3,packed suitcase,"We're getting ready for the trip, so the packe...",pakattu matkalaukku,"Valmistaudumme matkalle, joten pakattu matkala...",6530,False,"{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': True, 'explanation': 'The word '...","{'isCorrect': False, 'explanation': 'The Finni...","{'isCorrect': False, 'explanation': 'The trans...",
4,eating,I am having lunch in a restaurant that's busy.,syömässä,Syön lounasta vilkkaassa ravintolassa.,6535,False,"{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': True, 'explanation': 'The transl...","{'isCorrect': False, 'explanation': 'The Engli...",
5,family's desired,This trip is exactly what the family's desired...,perheen toivoma,"Tämä matka on juuri se, mitä perheen toivoma k...",6589,False,"{'isCorrect': False, 'explanation': 'The sente...","{'isCorrect': False, 'explanation': 'The phras...","{'isCorrect': False, 'explanation': 'The trans...","{'isCorrect': False, 'explanation': 'The Engli...",
6,day club,My child attends the day club every weekday.,päiväkerho,Lapseni käy päiväkerhossa joka arkipäivä.,6605,False,"{'isCorrect': False, 'explanation': 'The term ...","{'isCorrect': False, 'explanation': ''Day club...","{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': False, 'explanation': 'The Engli...",
7,to get out of,I can get out of the meeting by 3:00 PM.,vapautua,Voin vapautua kokouksesta klo 15 mennessä.,6634,False,"{'isCorrect': True, 'explanation': 'The Englis...","{'isCorrect': True, 'explanation': 'The word '...","{'isCorrect': False, 'explanation': 'The trans...","{'isCorrect': False, 'explanation': 'The Engli...",
8,functional,This device works really well.,toimiva,Tämä laite toimii todella hyvin.,6662,False,"{'isCorrect': False, 'explanation': 'The trans...","{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': True, 'explanation': 'The transl...","{'isCorrect': False, 'explanation': 'The trans...",
9,raspberry-flavored,Raspberry-flavored jam is really delicious.,vadelmanmakuinen,Vadelmanmakuinen hillo on todella herkullista.,6721,False,"{'isCorrect': True, 'explanation': 'The Englis...","{'isCorrect': False, 'explanation': 'The Engli...","{'isCorrect': False, 'explanation': 'The word ...","{'isCorrect': False, 'explanation': 'The origi...",


In [49]:
bad_cards_df.to_excel('fourth_book_bad_cards.xlsx')