# Realizar traducciones a todos los idiomas en 5 trials y medir sus tiempos de ejecución para ambos modelos.

In [None]:
import sys
import os
import time
import pandas as pd
import json
from tqdm import tqdm  # Importa tqdm para la barra de progreso

# Ajustando el directorio y añadiendo la ruta al path
notebook_dir = os.getcwd()
sys.path.append(os.path.abspath(os.path.join(notebook_dir, '../')))
from modules.translation.api import translate_text, translate_text_gemini

# Definiendo los idiomas
languages = [
    ('English', 'en'),
    ('Spanish', 'es'),
    ('Chinese', 'zh'),
    ('Arabic', 'ar'),
    ('Russian', 'ru'),
    ('German', 'de'),
    ('French', 'fr')
]

# Definiendo múltiples textos para cada tipo
text_samples = {
    'word': ['Hello', 'World', 'Peace', 'Dream', 'Sky'],
    'sentence': [
        'This is a test sentence.',
        'How are you doing today?',
        'The weather is great.',
        'I love learning new languages.',
        'Artificial Intelligence is fascinating.'
    ],
    'paragraph': [
        'This is a test paragraph with multiple sentences. It should help us understand how translation times vary with text length.',
        'Here is another example of a paragraph. It includes several details about a specific topic, which varies the complexity of the translation.',
        'The quick brown fox jumps over the lazy dog. This sentence contains every letter in the English alphabet.',
        'Translation models can vary significantly in their performance. It’s important to test them thoroughly.',
        'Efficient translation can transform communication in a globalized world, allowing for more seamless interactions across cultures.'
    ]
}

def extract_gemini_output(gemini_translation):
    try:
        gemini_json = json.loads(gemini_translation)
        if 'translated_text' in gemini_json:
            try:
                # Intentamos decodificar el contenido de 'translated_text'
                inner_json = json.loads(gemini_json['translated_text'])
                # Verificamos si el resultado es una lista
                if isinstance(inner_json, list):
                    # Extraemos 'output' de todos los diccionarios en la lista
                    outputs = [item['output'] for item in inner_json if 'output' in item]
                    return ' '.join(outputs)  # Unimos todos los outputs en una sola cadena
                else:
                    # Manejamos el caso donde el resultado es un único diccionario
                    return inner_json.get('output', '')
            except json.JSONDecodeError:
                # Si falla, asume que 'translated_text' ya está en el formato correcto
                return gemini_json['translated_text'].get('output', '')
        else:
            print("No 'translated_text' key in response:", gemini_json)
            return ''
    except json.JSONDecodeError as e:
        print("JSON Decode Error:", e)
        return ''

def measure_translation_times(text, source_lang, target_lang, source_lang_code, target_lang_code):
    hf_times, gemini_times = [], []
    hf_translations, gemini_translations = [], []
    for _ in range(5):  # Repetir la traducción 5 veces
        start_time = time.time()
        hf_translation = translate_text(text, source_lang, target_lang, source_lang_code, target_lang_code)
        hf_times.append(time.time() - start_time)
        hf_translations.append(json.loads(hf_translation).get('translated_text', ''))
        
        start_time = time.time()
        gemini_translation = translate_text_gemini(text, source_lang, target_lang)
        gemini_times.append(time.time() - start_time)
        gemini_translations.append(extract_gemini_output(gemini_translation))
        
    return hf_times, gemini_times, hf_translations, gemini_translations

results = []
total = len(languages) * (len(languages) - 1) * len(text_samples) * len(text_samples['word']) * 5
progress = tqdm(total=total, desc="Translating", unit="trans")

for source_lang, source_code in languages:
    for target_lang, target_code in languages:
        if source_code != target_code:
            for sample_type, texts in text_samples.items():
                for text in texts:
                    hf_times, gemini_times, hf_translations, gemini_translations = measure_translation_times(
                        text, source_lang, target_lang, source_code, target_code
                    )
                    for i in range(5):
                        results.append({
                            'Source Language': source_lang,
                            'Source ISO Code': source_code,
                            'Target Language': target_lang,
                            'Target ISO Code': target_code,
                            'Text Type': sample_type,
                            'Text': text,
                            'Trial': i + 1,
                            'HF Translated Text': hf_translations[i],
                            'HF Translation Time (s)': hf_times[i],
                            'Gemini Translated Text': gemini_translations[i],
                            'Gemini Translation Time (s)': gemini_times[i]
                        })
                        progress.update(1)

progress.close()

df = pd.DataFrame(results)
df.to_csv('data/translation_performance_results.csv', index=False)