## Translate ENG-DK using parallel processing

In [None]:
import pandas as pd
import os
import argostranslate.package
import argostranslate.translate
import time
from concurrent.futures import ThreadPoolExecutor

os.chdir('/folder')

inputfile_name = "./input.csv"
df = pd.read_csv(inputfile_name, dtype=str) 

from_code = "en"
to_code = "da"  

argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()

package_to_install = next(
    filter(
        lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
    )
)
argostranslate.package.install_from_path(package_to_install.download())

def translate_batch(texts):
    return [argostranslate.translate.translate(text, from_code, to_code) if pd.notna(text) else None for text in texts]

max_rows = 100000
start_time = time.time()

with ThreadPoolExecutor() as executor:
    for col in df.columns[1:11]:  
        print(f"Translating column: {col}")
        texts_to_translate = df[col].head(max_rows).tolist()
        
        translated_texts = list(executor.submit(translate_batch, texts_to_translate).result())

        df.loc[:max_rows-1, col] = translated_texts 


output_file = "output.csv"
df.to_csv(output_file, index=False)

end_time = time.time()
print(f"Translation complete! File saved as '{output_file}'.")
print(f"Total time taken for translation: {end_time - start_time:.2f} seconds.")


In [None]:
import pandas as pd
import os

os.chdir('/folder')

inputfile_name = "./input.csv"
df = pd.read_csv(inputfile_name, dtype=str) 

df_expanded = df.copy()

max_new_columns = 30
new_columns = [f"word_{i+1}" for i in range(max_new_columns)]

for col in new_columns:
    df_expanded[col] = None

for index, row in df.iterrows():
    words = []  

    for col in df.columns[1:11]: 
        tag = row[col]
        if pd.notna(tag) and len(tag.split()) > 1:  
            words.extend(tag.split())  

    for i in range(min(len(words), max_new_columns)):
        df_expanded.at[index, f"word_{i+1}"] = words[i]

df_expanded.to_csv("output.csv", index=False, sep = ";")

print("Processing complete")
