In [1]:
from get_prompt import (
    generate_commentary_translation_prompt,
    generate_combined_commentary_prompt,
    generate_plain_translation_prompt,
    generate_sanskrit_translation_prompt,
    generate_standardized_translation_prompt,
    generate_word_by_word_translation_prompt
 )

In [None]:
import google.generativeai as genai
import time
import pandas as pd
import multiprocessing
from tqdm import tqdm

# Configure Gemini API
genai.configure(api_key="YOUR_GEMINI_API_KEY")


In [3]:
 # Map content types to respective prompt functions
prompt_functions = {
    "4 commentary translation english": generate_commentary_translation_prompt,
    "combined commentary english": generate_combined_commentary_prompt,
    "plain english": generate_plain_translation_prompt,
    "sanskrit translation": generate_sanskrit_translation_prompt,
    "standardised translation": generate_standardized_translation_prompt,
    "word by word translation": generate_word_by_word_translation_prompt
}

In [4]:
import pandas as pd

# Default target language
DEFAULT_TARGET_LANGUAGE = "English"

def build_prompt(row, target_language=DEFAULT_TARGET_LANGUAGE):
    """
    Calls the appropriate prompt generator function based on content type.

    Args:
        row (pd.Series): A row from the DataFrame containing content.
        target_language (str): The target language for translation (default: "English").

    Returns:
        str: The generated prompt.
    """
    content_type = row.get("content type", "").strip().lower()  # Normalize case

    # Select the appropriate function or default to plain English translation
    prompt_function = prompt_functions.get(content_type, generate_plain_translation_prompt)
    
    # Generate and return the prompt
    return prompt_function(row, target_language)


In [5]:
# Function to send API request with retries
def run_gemini_task(task_prompt: str, model_name="gemini-pro", max_retries=5, wait_time=5) -> str:
    """
    Sends an API request to Gemini with retry logic.

    Args:
        task_prompt (str): The input prompt defining the task.
        model_name (str): Gemini model to use (default: "gemini-pro").
        max_retries (int): Maximum retry attempts if API call fails.
        wait_time (int): Seconds to wait before retrying.

    Returns:
        str: The AI-generated response or an error message.
    """
    retries = 0
    while retries < max_retries:
        try:
            model = genai.GenerativeModel(model_name)
            response = model.generate_content(task_prompt)

            if response and response.text:
                return response.text  # Successful response
            
            print(f"Attempt {retries + 1}: No response received. Retrying...")
        
        except Exception as e:
            print(f"Attempt {retries + 1}: Error - {e}. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)  # Wait before retrying
            retries += 1

    return "Error: Maximum retries reached. Unable to get a response from Gemini."



In [6]:


# Function to process the DataFrame in parallel
def process_dataframe(df, target_language="English", num_workers=5):
    """
    Processes the DataFrame by sending prompts in parallel using multiprocessing.

    Args:
        df (pd.DataFrame): DataFrame containing source texts and commentaries.
        target_language (str): The target language for translation.
        num_workers (int): Number of parallel processes.

    Returns:
        pd.DataFrame: Updated DataFrame with translation results.
    """
    # Create prompts for each row
    df["Prompt"] = df.apply(lambda row: build_prompt(row, target_language), axis=1)

    
    # Use multiprocessing for parallel API requests
    with multiprocessing.Pool(num_workers) as pool:
        results = list(tqdm(pool.imap(run_gemini_task, df["Prompt"]), total=len(df), desc="Processing Rows"))

    # Store results in DataFrame
    df["Translated Text"] = results


    return df



In [7]:
import pandas as pd

df = pd.read_csv("buddhist_text_translation.csv")


In [None]:

# Run batch translation
translated_df = process_dataframe(df, target_language="English", num_workers=5)
