In [38]:
import sys
import os
import time
import pandas as pd
from datetime import datetime
import json

# Use the current working directory instead of __file__ for Jupyter notebooks
notebook_dir = os.getcwd()
sys.path.append(os.path.abspath(os.path.join(notebook_dir, '../')))

# Now you can import your functions
from modules.translation.api import translate_text, translate_text_gemini

In [39]:
# Define the languages and ISO codes you want to test
languages = [
    ('English', 'en'),
    ('Spanish', 'es'),
    ('Chinese', 'zh'),
    ('Arabic', 'ar'),
    ('Russian', 'ru'),
    ('German', 'de'),
    ('French', 'fr')
]

# Define different types of text to test
text_samples = {
    'word': 'Hello',  # A single word
    'sentence': 'This is a test sentence.',  # A simple sentence
    'paragraph': 'This is a test paragraph with multiple sentences. It should help us understand how translation times vary with text length.'  # A paragraph
}

In [57]:
def extract_gemini_output(gemini_translation):
    try:
        gemini_json = json.loads(gemini_translation)
        return gemini_json.get('output', '')
    except json.JSONDecodeError:
        return ''

In [58]:
def measure_translation_times(text, source_lang, target_lang, source_lang_code, target_lang_code):
    # Measure time and capture translation for Hugging Face
    start_time = time.time()
    hf_translation = translate_text(text, source_lang, target_lang, source_lang_code, target_lang_code)
    hf_time = time.time() - start_time
    
    hf_translated_text = json.loads(hf_translation).get('translated_text', '')

    # Measure time and capture translation for Gemini
    start_time = time.time()
    gemini_translation = translate_text_gemini(text, source_lang, target_lang)
    gemini_time = time.time() - start_time
    
    gemini_translated_text = extract_gemini_output(gemini_translation)

    return hf_time, gemini_time, hf_translated_text, gemini_translated_text

In [59]:
results = []

for source_lang, source_code in languages:
    for target_lang, target_code in languages:
        if source_code != target_code:  # Skip translation to the same language
            for sample_type, sample_text in text_samples.items():
                hf_time, gemini_time, hf_translated_text, gemini_translated_text = measure_translation_times(
                    sample_text, source_lang, target_lang, source_code, target_code
                )
                results.append({
                    'Source Language': source_lang,
                    'Source ISO Code': source_code,
                    'Target Language': target_lang,
                    'Target ISO Code': target_code,
                    'Text Type': sample_type,
                    'Original Text': sample_text,
                    'HF Translated Text': hf_translated_text,
                    'HF Translation Time (s)': hf_time,
                    'Gemini Translated Text': gemini_translated_text,
                    'Gemini Translation Time (s)': gemini_time
                })

# Convert results to DataFrame for analysis
df = pd.DataFrame(results)

Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Failed to load model Helsinki-NLP/opus-mt-es-zh: Helsinki-NLP/opus-mt-es-zh is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having pe

Using Gemini API for translation.
Failed to load model Helsinki-NLP/opus-mt-ar-zh: Helsinki-NLP/opus-mt-ar-zh is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Falling back to Gemini API for translation.
Using Gemini API for translation.
Failed to load model Helsinki-NLP/opus-mt-ar-zh: Helsinki-NLP/opus-mt-ar-zh is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Falling back to Gemini API for translation.
Using Gemini API for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.
Using Hugging Face for translation.


In [60]:
# Save to CSV
csv_filename = f"translation_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
df.to_csv(csv_filename, index=False)

# Display the DataFrame as a table in Jupyter
df

Unnamed: 0,Source Language,Source ISO Code,Target Language,Target ISO Code,Text Type,Original Text,HF Translated Text,HF Translation Time (s),Gemini Translated Text,Gemini Translation Time (s)
0,English,en,Spanish,es,word,Hello,Hola.,1.358536,,1.237987
1,English,en,Spanish,es,sentence,This is a test sentence.,Esta es una frase de prueba.,0.286579,,0.898474
2,English,en,Spanish,es,paragraph,This is a test paragraph with multiple sentenc...,Este es un párrafo de prueba con múltiples ora...,0.803361,,1.082571
3,English,en,Chinese,zh,word,Hello,你好 你好 你好 你好 你好 你好 你好 你好 你好,13.740644,,1.316380
4,English,en,Chinese,zh,sentence,This is a test sentence.,这是一个试验判决。,0.228767,,1.308424
...,...,...,...,...,...,...,...,...,...,...
121,French,fr,Russian,ru,sentence,This is a test sentence.,This is a test sentence.,0.408469,,1.093402
122,French,fr,Russian,ru,paragraph,This is a test paragraph with multiple sentenc...,This is a test paragraph with multiple sentenc...,1.301146,,1.175362
123,French,fr,German,de,word,Hello,Hallo,1.445488,,1.187081
124,French,fr,German,de,sentence,This is a test sentence.,This is a Test Urteil.,0.418718,,1.343587


In [61]:
import plotly.express as px

# Gráfico 1: Comparación de los tiempos de traducción de Hugging Face por tipo de texto y lenguaje objetivo
fig1 = px.bar(df, 
              x='Text Type', 
              y='HF Translation Time (s)', 
              color='Target Language', 
              barmode='group',
              title='Hugging Face Translation Times by Text Type and Target Language',
              labels={'HF Translation Time (s)': 'Translation Time (s)', 'Text Type': 'Text Type'})
fig1.show()

# Gráfico 2: Comparación de los tiempos de traducción de Gemini por tipo de texto y lenguaje objetivo
fig2 = px.bar(df, 
              x='Text Type', 
              y='Gemini Translation Time (s)', 
              color='Target Language', 
              barmode='group',
              title='Gemini Translation Times by Text Type and Target Language',
              labels={'Gemini Translation Time (s)': 'Translation Time (s)', 'Text Type': 'Text Type'})
fig2.show()

# Gráfico 3: Comparación entre los tiempos de traducción de Hugging Face y Gemini
df_melted = pd.melt(df, 
                    id_vars=['Source Language', 'Target Language', 'Text Type'], 
                    value_vars=['HF Translation Time (s)', 'Gemini Translation Time (s)'], 
                    var_name='Model', 
                    value_name='Translation Time (s)')

fig3 = px.bar(df_melted, 
              x='Text Type', 
              y='Translation Time (s)', 
              color='Model', 
              barmode='group',
              title='Comparison of Hugging Face and Gemini Translation Times',
              labels={'Translation Time (s)': 'Translation Time (s)', 'Text Type': 'Text Type'})
fig3.show()

In [62]:
# Gráfico 4: Comparación del tiempo de traducción de cada idioma a otros idiomas (Hugging Face)
fig4 = px.bar(df, 
              x='Source Language', 
              y='HF Translation Time (s)', 
              color='Target Language', 
              barmode='group',
              title='Hugging Face Translation Times by Source and Target Language',
              labels={'HF Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig4.show()

# Gráfico 5: Comparación del tiempo de traducción de cada idioma a otros idiomas (Gemini)
fig5 = px.bar(df, 
              x='Source Language', 
              y='Gemini Translation Time (s)', 
              color='Target Language', 
              barmode='group',
              title='Gemini Translation Times by Source and Target Language',
              labels={'Gemini Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig5.show()

In [63]:
import plotly.express as px

# Filtrar los datos por tipo de texto: Palabras (Word)
df_word = df[df['Text Type'] == 'word']

# Hugging Face Translation Times for Words
fig_word_hf = px.bar(df_word, 
                     x='Source Language', 
                     y='HF Translation Time (s)', 
                     color='Target Language', 
                     barmode='group',
                     title='Hugging Face Translation Times for Words by Source and Target Language',
                     labels={'HF Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig_word_hf.show()

# Gemini Translation Times for Words
fig_word_gemini = px.bar(df_word, 
                         x='Source Language', 
                         y='Gemini Translation Time (s)', 
                         color='Target Language', 
                         barmode='group',
                         title='Gemini Translation Times for Words by Source and Target Language',
                         labels={'Gemini Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig_word_gemini.show()

In [64]:
import plotly.express as px

# Filtrar los datos por tipo de texto: Oraciones (Sentence)
df_sentence = df[df['Text Type'] == 'sentence']

# Hugging Face Translation Times for Sentences
fig_sentence_hf = px.bar(df_sentence, 
                         x='Source Language', 
                         y='HF Translation Time (s)', 
                         color='Target Language', 
                         barmode='group',
                         title='Hugging Face Translation Times for Sentences by Source and Target Language',
                         labels={'HF Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig_sentence_hf.show()

# Gemini Translation Times for Sentences
fig_sentence_gemini = px.bar(df_sentence, 
                             x='Source Language', 
                             y='Gemini Translation Time (s)', 
                             color='Target Language', 
                             barmode='group',
                             title='Gemini Translation Times for Sentences by Source and Target Language',
                             labels={'Gemini Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig_sentence_gemini.show()

In [65]:
import plotly.express as px

# Filtrar los datos por tipo de texto: Párrafos (Paragraph)
df_paragraph = df[df['Text Type'] == 'paragraph']

# Hugging Face Translation Times for Paragraphs
fig_paragraph_hf = px.bar(df_paragraph, 
                          x='Source Language', 
                          y='HF Translation Time (s)', 
                          color='Target Language', 
                          barmode='group',
                          title='Hugging Face Translation Times for Paragraphs by Source and Target Language',
                          labels={'HF Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig_paragraph_hf.show()

# Gemini Translation Times for Paragraphs
fig_paragraph_gemini = px.bar(df_paragraph, 
                              x='Source Language', 
                              y='Gemini Translation Time (s)', 
                              color='Target Language', 
                              barmode='group',
                              title='Gemini Translation Times for Paragraphs by Source and Target Language',
                              labels={'Gemini Translation Time (s)': 'Translation Time (s)', 'Source Language': 'Source Language'})
fig_paragraph_gemini.show()

In [66]:
import plotly.express as px

# Gráfico de burbujas para comparar ambos modelos por idioma
fig1 = px.scatter(df, 
                  x='Source Language', 
                  y='Target Language', 
                  size='HF Translation Time (s)', 
                  color='Text Type',
                  hover_data={'Source Language': True, 'Target Language': True, 'HF Translation Time (s)': True, 'Gemini Translation Time (s)': True},
                  title='Hugging Face Translation Times by Source and Target Language with Text Type',
                  labels={'HF Translation Time (s)': 'HF Translation Time (s)', 'Source Language': 'Source Language', 'Target Language': 'Target Language'})

fig1.update_traces(marker=dict(sizemode='area', sizeref=2.*max(df['HF Translation Time (s)'])/(100.**2), line_width=2))
fig1.show()

fig2 = px.scatter(df, 
                  x='Source Language', 
                  y='Target Language', 
                  size='Gemini Translation Time (s)', 
                  color='Text Type',
                  hover_data={'Source Language': True, 'Target Language': True, 'HF Translation Time (s)': True, 'Gemini Translation Time (s)': True},
                  title='Gemini Translation Times by Source and Target Language with Text Type',
                  labels={'Gemini Translation Time (s)': 'Gemini Translation Time (s)', 'Source Language': 'Source Language', 'Target Language': 'Target Language'})

fig2.update_traces(marker=dict(sizemode='area', sizeref=2.*max(df['Gemini Translation Time (s)'])/(100.**2), line_width=2))
fig2.show()

In [67]:
import plotly.express as px

# Combinar las traducciones de ambos modelos para la comparación global
df_melted_model = pd.melt(df, 
                          id_vars=['Source Language', 'Target Language', 'Text Type'], 
                          value_vars=['HF Translation Time (s)', 'Gemini Translation Time (s)'], 
                          var_name='Model', 
                          value_name='Translation Time (s)')

# Gráfico de burbujas para comparar ambos modelos en general
fig3 = px.scatter(df_melted_model, 
                  x='Text Type', 
                  y='Source Language', 
                  size='Translation Time (s)', 
                  color='Model',
                  hover_data={'Source Language': True, 'Target Language': True, 'Translation Time (s)': True},
                  title='Comparison of Translation Times Between Hugging Face and Gemini',
                  labels={'Translation Time (s)': 'Translation Time (s)', 'Text Type': 'Text Type', 'Source Language': 'Source Language'})

fig3.update_traces(marker=dict(sizemode='area', sizeref=2.*max(df_melted_model['Translation Time (s)'])/(100.**2), line_width=2))
fig3.show()

In [68]:
import plotly.express as px

# Calcular los tiempos de traducción promedio por modelo y tipo de texto
df_avg_time = df_melted_model.groupby(['Text Type', 'Model']).agg({'Translation Time (s)': 'mean'}).reset_index()

# Gráfico de burbujas para el tiempo promedio de un modelo por tipo de texto
fig4 = px.scatter(df_avg_time, 
                  x='Text Type', 
                  y='Model', 
                  size='Translation Time (s)', 
                  color='Model',
                  hover_data={'Text Type': True, 'Translation Time (s)': True},
                  title='Average Translation Time by Model and Text Type',
                  labels={'Translation Time (s)': 'Average Translation Time (s)', 'Text Type': 'Text Type', 'Model': 'Model'})

fig4.update_traces(marker=dict(sizemode='area', sizeref=2.*max(df_avg_time['Translation Time (s)'])/(100.**2), line_width=2))
fig4.show()