# üåê Translation Notebook: F√∂rderdatenbank Data
This notebook translates German funding data into English using the DeepL API.

In [None]:
# %%
import pandas as pd
import deepl
import time
from tqdm import tqdm
from dotenv import load_dotenv
import os

In [None]:
# %%
# Load API key from .env file
load_dotenv()
DEEPL_API_KEY = os.getenv("DEEPL_API_KEY")

if not DEEPL_API_KEY:
    raise ValueError("DeepL API key not found. Please create a .env file with DEEPL_API_KEY=your-key")

In [None]:
# Initialize DeepL translator
translator = deepl.Translator(DEEPL_API_KEY)

In [20]:
# %%
# Load the German scraped data
df = pd.read_csv("data/funding-foerderdatenbank-data.csv")



In [22]:
# %%
# Define columns to translate
columns_to_translate = ["name", "description", "domain", "eligibility", "location"]



In [23]:
# %%
# Enhanced translation function with rate limiting
def translate_text(text):
    if pd.isna(text) or not isinstance(text, str) or not text.strip():
        return text
    
    for attempt in range(5):  # Increase retry attempts
        try:
            result = translator.translate_text(text, source_lang="DE", target_lang="EN-US")
            time.sleep(0.5)  # Add delay between requests
            return result.text
        except deepl.DeepLException as e:
            if "Too many requests" in str(e):
                wait_time = min(2 ** attempt, 60)  # Exponential backoff
                print(f"‚è≥ Rate limit hit. Waiting {wait_time}s...")
                time.sleep(wait_time)
            else:
                print(f"‚ùå API Error: {text[:30]}... | {e}")
                time.sleep(5)
    return text  # Fallback to original text

In [24]:
# %%
# Translate columns with progress monitoring
for col in columns_to_translate:
    print(f"üî§ Translating column: {col}")
    df[col] = df[col].astype(str)
    
    # Batch processing with error handling
    translated = []
    for text in tqdm(df[col]):
        try:
            translated.append(translate_text(text))
        except Exception as e:
            print(f"‚ö†Ô∏è Translation failed: {e}")
            translated.append(text)  # Keep original on failure
    
    df[col] = translated


üî§ Translating column: name


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 34/34 [00:21<00:00,  1.58it/s]


üî§ Translating column: description


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 34/34 [00:24<00:00,  1.41it/s]


üî§ Translating column: domain


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 34/34 [00:20<00:00,  1.64it/s]


üî§ Translating column: eligibility


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 34/34 [00:20<00:00,  1.67it/s]


üî§ Translating column: location


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 34/34 [00:19<00:00,  1.70it/s]


In [26]:
# %%
# Save the translated result
output_path = "data/funding-foerderdatenbank-english.csv"
df.to_csv(output_path, index=False)
print(f"‚úÖ Translated data saved to: {output_path}")


‚úÖ Translated data saved to: data/funding-foerderdatenbank-english.csv
