In [3]:
import google.generativeai as genai
import os
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

# Manual testing

In [42]:
model = genai.GenerativeModel("gemini-1.5-pro")
response = model.generate_content("Can you translate the following sentence into Italian: I am Afghan.",
                                 generation_config=genai.types.GenerationConfig(
                                                                                candidate_count=1,
                                                                                temperature=1
                                                                                )
                                 )
print(response.text)

I0000 00:00:1734875778.299583 2220265 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


Sono afgano/afgana.


* **Sono afgano:** if you are a male.
* **Sono afgana:** if you are a female.



In [44]:
model = genai.GenerativeModel("gemini-2.0-flash-exp")
response = model.generate_content("Can you translate the following sentence into French: I am Afghan.",
                                 generation_config=genai.types.GenerationConfig(
                                                                                candidate_count=1,
                                                                                temperature=0.4, 
                                                                                max_output_tokens=100
                                                                                )
                                 )
print(response.text)

Here are a few ways to translate "I am Afghan" into French, with slight nuances:

* **Je suis afghan.** (Masculine) - This is the most direct and common translation if you are male.
* **Je suis afghane.** (Feminine) - This is the correct translation if you are female.

**Explanation:**

* **Je suis:** This means "I am".
* **afghan:** This is the masculine form of the


In [15]:
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content("Can you translate the following sentence into German: I am Afghan.",
                                 generation_config=genai.types.GenerationConfig(
                                                                                candidate_count=1,
                                                                                temperature=1
                                                                                )
                                 )
print(response.text)

Ich bin Afghanin/Afghane.

* **Ich bin Afghanin:**  This is used by a female speaker.
* **Ich bin Afghane:** This is used by a male speaker.



In [16]:
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content("Can you translate the following sentence into Spanish: I am Afghan.",
                                 generation_config=genai.types.GenerationConfig(
                                                                                candidate_count=1,
                                                                                temperature=1
                                                                                )
                                 )
print(response.text)

Soy afgano. (For a male speaker)
Soy afgana. (For a female speaker)



# Data preparation

In [4]:
import pandas as pd
import ast

df = pd.read_csv('output/dataset.csv')
df['sentence'] = df['sentence'].apply(lambda x: ast.literal_eval(x)) # str to dict
df.iloc[120]['sentence']

{'eng': {'m': None, 'f': None, 'n': 'I am Mauritanian.'},
 'fra': {'m': 'Je suis Mauritanien.',
  'f': 'Je suis Mauritanienne.',
  'n': None},
 'spa': {'m': 'Soy mauritano.', 'f': 'Soy mauritana.', 'n': None},
 'deu': {'m': 'Ich bin Mauretanier.',
  'f': 'Ich bin  Mauretanierin.',
  'n': None},
 'it': {'m': 'Sono mauritano.', 'f': 'Sono mauritaniana.', 'n': None}}

# Experiment Function

In [5]:
languages = {
    'fra': 'French',
    'spa': 'Spanish',
    'deu': 'German',
    'it': 'Italian'
}
# https://ai.google.dev/gemini-api/docs/models/gemini#gemini-2.0-flash
models = ["gemini-1.5-flash", "gemini-1.5-pro", "gemini-2.0-flash-exp"]

In [6]:
def experiment(model_name, lang_name, sentence):
    '''
    Translates a given sentence into the specified target language using the specified model.

    Parameters:
    - model (str): The name of the model to use .
    - language (str): The target language for the translation.
    - sentence (str): The sentence to be translated.

    Returns:
    - str: The translated sentence generated by the model.
    '''
    
    model = genai.GenerativeModel(model_name)
    response = model.generate_content(f"Can you translate the following sentence into {lang_name}: {sentence}",
                                      generation_config=genai.types.GenerationConfig(temperature=0.2,
                                                                                     top_p=1,
                                                                                     max_output_tokens=100))
    
    return response.text

In [7]:
import pandas as pd
from tqdm import tqdm

# Add progress bar functionality
tqdm.pandas()

def process_sentences(df, languages, models, output_folder):
    """
    Processes sentences for multiple languages and models.

    Args:
        df (pd.DataFrame): The input DataFrame containing sentences.
        languages (dict): A dictionary where keys are language codes (e.g., 'fra', 'spa') 
                          and values are full language names (e.g., 'French', 'Spanish').
        models (list): A list of model names to use for processing.
        output_folder (str): The folder to save output CSV files.

    Returns:
        None
    """
    for model_name in models:
        for lang_code, lang_name in languages.items():
            # Create a new DataFrame for the current language
            df_exp = pd.DataFrame(columns=['eng', f'{lang_code}_m', f'{lang_code}_f', f'{lang_code}_n', 'output'])
            df_exp['eng'] = df['sentence'].apply(lambda x: x.get('eng', {}).get('n'))
            df_exp[f'{lang_code}_m'] = df['sentence'].apply(lambda x: x.get(lang_code, {}).get('m'))
            df_exp[f'{lang_code}_f'] = df['sentence'].apply(lambda x: x.get(lang_code, {}).get('f'))
            df_exp[f'{lang_code}_n'] = df['sentence'].apply(lambda x: x.get(lang_code, {}).get('n'))

            # Apply the experiment function
            df_exp['output'] = df_exp['eng'].progress_apply(lambda sentence: experiment(model_name, lang_name, sentence))

            # Save the results to a CSV file
            output_path = f"{output_folder}/{lang_code}_{model_name.split('gemini-')[1]}_exp.csv"
            df_exp.to_csv(output_path, index=False)
            print(f"Saved results to {output_path}")

In [25]:
process_sentences(df, languages, [models[0]], output_folder='output/gemini')

100%|█████████████████████████████████████████| 193/193 [02:13<00:00,  1.45it/s]


Saved results to output/gemini/fra_1.5-flash_exp.csv


100%|█████████████████████████████████████████| 193/193 [02:03<00:00,  1.57it/s]


Saved results to output/gemini/spa_1.5-flash_exp.csv


100%|█████████████████████████████████████████| 193/193 [01:59<00:00,  1.61it/s]


Saved results to output/gemini/deu_1.5-flash_exp.csv


100%|█████████████████████████████████████████| 193/193 [01:57<00:00,  1.64it/s]

Saved results to output/gemini/it_1.5-flash_exp.csv





In [26]:
process_sentences(df, languages, [models[1]], output_folder='output/gemini')

100%|█████████████████████████████████████████| 193/193 [03:40<00:00,  1.14s/it]


Saved results to output/gemini/fra_1.5-pro_exp.csv


100%|█████████████████████████████████████████| 193/193 [04:22<00:00,  1.36s/it]


Saved results to output/gemini/spa_1.5-pro_exp.csv


100%|█████████████████████████████████████████| 193/193 [05:17<00:00,  1.65s/it]


Saved results to output/gemini/deu_1.5-pro_exp.csv


100%|█████████████████████████████████████████| 193/193 [04:05<00:00,  1.27s/it]

Saved results to output/gemini/it_1.5-pro_exp.csv



