# IMPORT

In [1]:
import os
import re
import time
import json
import asyncio
import numpy as np
import pandas as pd

from tqdm import tqdm
from googletrans import Translator

# FUNCTION

In [2]:
async def gg_translate(sentence, from_lang="vi", to_lang="en"):
    async with Translator() as translator:
        result = await translator.translate(sentence, src=from_lang, dest=to_lang)
        return result

async def run_gg_translate(sentence, from_lang="vi", to_lang="en"):
    translated_sentence_result = await gg_translate(sentence, from_lang=from_lang, to_lang=to_lang)
    return translated_sentence_result.text

In [3]:
async def row_translation(row_sentences, from_lang="vi", to_lang="en"):
    tasks = [run_gg_translate(sen) for sen in row_sentences]
    translated_sentences = await asyncio.gather(*tasks)
    return translated_sentences

# LOAD DATA

In [4]:
df_path = r"F:\UNIVERSITY\Project\Sentiment-Analysis-Airflow\Financial-Sentiment-Analysis\projects\data\gather_all_contents.csv"
df = pd.read_csv(df_path)

# TRANSLATE

In [5]:
columns = df.columns
need_translated_columns = [
    col_name 
    for col_name in columns 
    if ("relative_sen" in col_name)
]

In [6]:
need_translated_df = df[need_translated_columns]
need_translated_values = need_translated_df.values

need_translated_columns = need_translated_df.columns
translated_columns = [f"translated_{col_name}" for col_name in need_translated_columns]
translated_data = {col_name: [] for col_name in translated_columns}

In [None]:
save_path = r"F:\UNIVERSITY\Project\Sentiment-Analysis-Airflow\Financial-Sentiment-Analysis\projects\data\save_translation\{id}.npy"
attempts = 3

for i, rows in tqdm(df.iterrows()):
    row_id = rows["Unnamed: 0"]
    row_save_path = save_path.format(id=row_id)
    if os.path.isfile(row_save_path):
        continue
    rows_values = rows[need_translated_columns].values
    translated_data = {col_name: None for col_name in translated_columns}
    
    #-- Translation
    for attempt in range(attempts):
        try:
            translated_rows = await row_translation(rows_values)
            for i, col_name in enumerate(need_translated_columns):
                trans_col_name = f"translated_{col_name}"
                translated_data[trans_col_name] = translated_rows[i]
                np.save(row_save_path, translated_data)
            break
        except Exception as e:
            print(f"ID: {row_id} - Try: {attempt}")
            print(e)
            time.sleep(2)

9982it [00:02, 3535.48it/s]
